Hello,
about half an hour ago, we experienced a problem with our MultiKARL (Ariadne, Oxfam, Privacy) installation which made the application unavailable. The timeline:
* Nagios alerted us that Ariadne, Oxfam, and Privacy were not available any more as well as that the KARL process itself was not responding to HTTP requests any more
* supervisorctl showed all processes as running (karl, search, mailin, mailout)
* However, we've seen the following two tracebacks when tailing the log for KARL:
Traceback (most recent call last):
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 1068, in process_request_in_thread
self.finish_request(request, client_address)
File "/usr/lib/python2.6/SocketServer.py", line 322, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/usr/lib/python2.6/SocketServer.py", line 617, in __init__
self.handle()
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 442, in handle
BaseHTTPRequestHandler.handle(self)
File "/usr/lib/python2.6/BaseHTTPServer.py", line 329, in handle
self.handle_one_request()
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 437, in handle_one_request
self.wsgi_execute()
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 287, in wsgi_execute
self.wsgi_start_response)
File "/srv/multikarl/production/12/eggs/guillotine-0.1-py2.6.egg/guillotine/__init__.py", line 10, in __call__
return self.app(environ, start_response)
File "/srv/multikarl/production/12/eggs/repoze.vhm-0.13-py2.6.egg/repoze/vhm/middleware.py", line 106, in __call__
return self.application(environ, start_response)
File "/srv/multikarl/production/12/eggs/repoze.browserid-0.3-py2.6.egg/repoze/browserid/middleware.py", line 127, in __call__
return self.app(environ, start_response)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/router.py", line 176, in __call__
response = self.handle_request(request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/tweens.py", line 17, in excview_tween
response = handler(request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/router.py", line 153, in handle_request
response = view_callable(context, request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/config/views.py", line 319, in viewresult_to_response
result = view(context, request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/config/views.py", line 402, in _requestonly_view
response = view(request)
File "/srv/multikarl/production/12/eggs/karlserve-1.18-py2.6.egg/karlserve/application.py", line 135, in site_dispatch
return request.get_response(instance.pipeline())
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1086, in get_response
application, catch_exc_info=False)
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1055, in call_application
app_iter = application(self.environ, start_response)
File "/srv/multikarl/production/12/eggs/repoze.urchin-0.2-py2.6.egg/repoze/urchin/__init__.py", line 53, in __call__
resp = req.get_response(self.app)
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1086, in get_response
application, catch_exc_info=False)
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1055, in call_application
app_iter = application(self.environ, start_response)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/router.py", line 176, in __call__
response = self.handle_request(request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/tweens.py", line 34, in excview_tween
response = view_callable(exc, request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/config/views.py", line 292, in rendered_view
result = view(context, request)
File "/srv/multikarl/production/12/eggs/karl-3.99-py2.6.egg/karl/errorpage.py", line 75, in errorpage
logging.getLogger('karl').error('\n'.join(message), exc_info=True)
File "/usr/lib/python2.6/logging/__init__.py", line 1082, in error
self._log(ERROR, msg, args, **kwargs)
File "/usr/lib/python2.6/logging/__init__.py", line 1173, in _log
self.handle(record)
File "/usr/lib/python2.6/logging/__init__.py", line 1183, in handle
self.callHandlers(record)
File "/usr/lib/python2.6/logging/__init__.py", line 1220, in callHandlers
hdlr.handle(record)
File "/usr/lib/python2.6/logging/__init__.py", line 679, in handle
self.emit(record)
File "/srv/multikarl/production/12/eggs/karlserve-1.18-py2.6.egg/karlserve/log.py", line 86, in emit
log.log(record.levelname, self.subsystem, message, exc_info)
File "/srv/multikarl/production/12/eggs/karl-3.99-py2.6.egg/karl/redislog.py", line 59, in log
record = [(key if key else NORECORD) for key in tx.execute()]
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/client.py", line 1528, in execute
return execute(conn, stack)
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/client.py", line 1453, in _execute_transaction
connection.send_packed_command(all_cmds)
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/connection.py", line 241, in send_packed_command
self.connect()
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/connection.py", line 189, in connect
raise ConnectionError(self._error_message(e))
ConnectionError: Error -5 connecting multikarl01.gocept.net:6379. No address associated with hostname.
multikarl01 is the database server (PostgreSQL and Redis). This traceback showed up only *one time*, directly followed by many occurances of the second traceback:
Traceback (most recent call last):
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 1068, in process_request_in_thread
self.finish_request(request, client_address)
File "/usr/lib/python2.6/SocketServer.py", line 322, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/usr/lib/python2.6/SocketServer.py", line 617, in __init__
self.handle()
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 442, in handle
BaseHTTPRequestHandler.handle(self)
File "/usr/lib/python2.6/BaseHTTPServer.py", line 329, in handle
self.handle_one_request()
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 437, in handle_one_request
self.wsgi_execute()
File "/srv/multikarl/production/12/eggs/Paste-1.7.5.1-py2.6.egg/paste/httpserver.py", line 287, in wsgi_execute
self.wsgi_start_response)
File "/srv/multikarl/production/12/eggs/guillotine-0.1-py2.6.egg/guillotine/__init__.py", line 10, in __call__
return self.app(environ, start_response)
File "/srv/multikarl/production/12/eggs/repoze.vhm-0.13-py2.6.egg/repoze/vhm/middleware.py", line 106, in __call__
return self.application(environ, start_response)
File "/srv/multikarl/production/12/eggs/repoze.browserid-0.3-py2.6.egg/repoze/browserid/middleware.py", line 127, in __call__
return self.app(environ, start_response)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/router.py", line 176, in __call__
response = self.handle_request(request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/tweens.py", line 17, in excview_tween
response = handler(request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/router.py", line 153, in handle_request
response = view_callable(context, request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/config/views.py", line 319, in viewresult_to_response
result = view(context, request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/config/views.py", line 402, in _requestonly_view
response = view(request)
File "/srv/multikarl/production/12/eggs/karlserve-1.18-py2.6.egg/karlserve/application.py", line 135, in site_dispatch
return request.get_response(instance.pipeline())
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1086, in get_response
application, catch_exc_info=False)
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1055, in call_application
app_iter = application(self.environ, start_response)
File "/srv/multikarl/production/12/eggs/repoze.urchin-0.2-py2.6.egg/repoze/urchin/__init__.py", line 53, in __call__
resp = req.get_response(self.app)
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1086, in get_response
application, catch_exc_info=False)
File "/srv/multikarl/production/12/eggs/WebOb-1.1.1-py2.6.egg/webob/request.py", line 1055, in call_application
app_iter = application(self.environ, start_response)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/router.py", line 176, in __call__
response = self.handle_request(request) File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/tweens.py", line 34, in excview_tween
response = view_callable(exc, request)
File "/srv/multikarl/production/12/eggs/pyramid-1.2.1-py2.6.egg/pyramid/config/views.py", line 292, in rendered_view
result = view(context, request)
File "/srv/multikarl/production/12/eggs/karl-3.99-py2.6.egg/karl/errorpage.py", line 75, in errorpage
logging.getLogger('karl').error('\n'.join(message), exc_info=True)
File "/usr/lib/python2.6/logging/__init__.py", line 1082, in error
self._log(ERROR, msg, args, **kwargs)
File "/usr/lib/python2.6/logging/__init__.py", line 1173, in _log
self.handle(record)
File "/usr/lib/python2.6/logging/__init__.py", line 1183, in handle
self.callHandlers(record)
File "/usr/lib/python2.6/logging/__init__.py", line 1220, in callHandlers
hdlr.handle(record)
File "/usr/lib/python2.6/logging/__init__.py", line 679, in handle
self.emit(record)
File "/srv/multikarl/production/12/eggs/karlserve-1.18-py2.6.egg/karlserve/log.py", line 86, in emit
log.log(record.levelname, self.subsystem, message, exc_info)
File "/srv/multikarl/production/12/eggs/karl-3.99-py2.6.egg/karl/redislog.py", line 59, in log
record = [(key if key else NORECORD) for key in tx.execute()]
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/client.py", line 1528, in execute
return execute(conn, stack)
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/client.py", line 1453, in _execute_transaction
connection.send_packed_command(all_cmds)
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/connection.py", line 241, in send_packed_command
self.connect()
File "/srv/multikarl/production/12/eggs/redis-2.4.11-py2.6.egg/redis/connection.py", line 189, in connect
raise ConnectionError(self._error_message(e))
ConnectionError: Error 24 connecting multikarl01.gocept.net:6379. Too many open files.
* What helped us getting the application working again was to restart the karl instance, followed by (a few minutes later) the search instance. Timely, the Nagios checks became green again after restarting the search instance. However, we're unsure if solely restarting the search instance would have been sufficient.
Do you have an idea what might have happened here?
Best regards,
Alex
Handing over to Chris for some brainstorming.