2023-08-03 17:17:23 |
Adam Collard |
description |
[Problem Description]
After applying the fixes proposed in LP#2027735 to MAAS 3.2.8 (taken from ppa:r00ta/maas-2027735), MAAS started to behave well, with the expected improved performance. But after around ~24 hours, provisioning of nodes started to fail, and the following traces were seen in:
rackd.log:
----------
2023-07-31 23:16:36 provisioningserver.rpc.clusterservice: [critical] Failed to contact region. (While requesting RPC info at http://10.217.0.11:5240/MAAS/, http://10.217.0.66:5240/MAAS/).
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 460, in callback
self._startRunCallbacks(result)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 568, in _startRunCallbacks
self._runCallbacks()
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1475, in gotResult
_inlineCallbacks(r, g, status)
--- <exception caught here> ---
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1292, in _doUpdate
eventloops, maas_url = yield self._get_rpc_info(urls)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1549, in _get_rpc_info
raise config_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1520, in _get_rpc_info
eventloops, maas_url = yield self._parallel_fetch_rpc_info(urls)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1494, in handle_responses
errors[0].raiseException()
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
raise self.value.with_traceback(self.tb)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1455, in _serial_fetch_rpc_info
raise last_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1447, in _serial_fetch_rpc_info
response = yield self._fetch_rpc_info(url, orig_url)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 491, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1549, in _get_rpc_info
raise config_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1520, in _get_rpc_info
eventloops, maas_url = yield self._parallel_fetch_rpc_info(urls)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1494, in handle_responses
errors[0].raiseException()
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
raise self.value.with_traceback(self.tb)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 491, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1455, in _serial_fetch_rpc_info
raise last_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1447, in _serial_fetch_rpc_info
response = yield self._fetch_rpc_info(url, orig_url)
twisted.internet.error.ConnectingCancelledError: HostnameAddress(hostname=b'10.217.0.11', port=5240)
2023-07-31 23:16:36 provisioningserver.rpc.common: [debug] [RPC -> sent] AmpBox({b'_command': b'Ping'})
regiond.log:
------------
2023-07-31 23:17:23 maasserver.dhcp: [critical] Error configuring DHCPv6 on rack controller 'pdx01-m01-c34-cpu-01 (xfhrbn)': unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/provisioningserver/prometheus/utils.py", line 127, in wrapper
result = func(*args, **kwargs)
File "/usr/lib/python3/dist-packages/provisioningserver/utils/twisted.py", line 127, in wrapper
return func(*args, **kwargs)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/common.py", line 176, in __call__
return deferWithTimeout(
File "/usr/lib/python3/dist-packages/provisioningserver/utils/twisted.py", line 325, in deferWithTimeout
d = maybeDeferred(func, *args, **kwargs)
--- <exception caught here> ---
File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 898, in configure_dhcp
yield client(
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 151, in maybeDeferred
result = f(*args, **kw)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 971, in callRemote
return co._doCommand(self)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 2000, in _doCommand
d = proto._sendBoxCommand(self.commandName,
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/common.py", line 261, in _sendBoxCommand
return super()._sendBoxCommand(
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 902, in _sendBoxCommand
box._sendTo(self.boxSender)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 723, in _sendTo
proto.sendBox(self)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 2386, in sendBox
self.transport.write(box.serialize())
File "/usr/lib/python3/dist-packages/twisted/internet/_newtls.py", line 191, in write
FileDescriptor.write(self, bytes)
File "/usr/lib/python3/dist-packages/twisted/internet/abstract.py", line 356, in write
self.startWriting()
File "/usr/lib/python3/dist-packages/twisted/internet/abstract.py", line 443, in startWriting
self.reactor.addWriter(self)
File "/usr/lib/python3/dist-packages/twisted/internet/asyncioreactor.py", line 173, in addWriter
self._asyncioEventloop.add_writer(fd, callWithLogger, writer,
File "uvloop/loop.pyx", line 2399, in uvloop.loop.Loop.add_writer
File "uvloop/loop.pyx", line 808, in uvloop.loop.Loop._add_writer
File "uvloop/handles/poll.pyx", line 122, in uvloop.loop.UVPoll.start_writing
File "uvloop/handles/poll.pyx", line 39, in uvloop.loop.UVPoll._poll_start
File "uvloop/handles/handle.pyx", line 159, in uvloop.loop.UVHandle._ensure_alive
builtins.RuntimeError: unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
2023-07-31 23:17:23 maasserver.rack_controller: [critical] Failed configuring DHCP on rack controller 'id:12'.
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1475, in gotResult
_inlineCallbacks(r, g, status)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1464, in _inlineCallbacks
status.deferred.errback()
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 501, in errback
self._startRunCallbacks(fail)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 568, in _startRunCallbacks
self._runCallbacks()
--- <exception caught here> ---
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/maasserver/rack_controller.py", line 281, in <lambda>
d.addErrback(lambda f: f.trap(NoConnectionsAvailable))
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 439, in trap
self.raiseException()
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
raise self.value.with_traceback(self.tb)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
result = g.send(result)
File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 951, in configure_dhcp
raise ipv4_exc
File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 869, in configure_dhcp
yield client(
builtins.RuntimeError: unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
Ubuntu version: 20.04
MAAS: 3.2.99 (Interim version from PPA)
Format: Debian
PostgreSQL 12 |
[Problem Description]
After applying the fixes proposed in LP:2027735 to MAAS 3.2.8 (taken from ppa:r00ta/maas-2027735), MAAS started to behave well, with the expected improved performance. But after around ~24 hours, provisioning of nodes started to fail, and the following traces were seen in:
rackd.log:
----------
2023-07-31 23:16:36 provisioningserver.rpc.clusterservice: [critical] Failed to contact region. (While requesting RPC info at http://10.217.0.11:5240/MAAS/, http://10.217.0.66:5240/MAAS/).
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 460, in callback
self._startRunCallbacks(result)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 568, in _startRunCallbacks
self._runCallbacks()
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1475, in gotResult
_inlineCallbacks(r, g, status)
--- <exception caught here> ---
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1292, in _doUpdate
eventloops, maas_url = yield self._get_rpc_info(urls)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1549, in _get_rpc_info
raise config_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1520, in _get_rpc_info
eventloops, maas_url = yield self._parallel_fetch_rpc_info(urls)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1494, in handle_responses
errors[0].raiseException()
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
raise self.value.with_traceback(self.tb)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1455, in _serial_fetch_rpc_info
raise last_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1447, in _serial_fetch_rpc_info
response = yield self._fetch_rpc_info(url, orig_url)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 491, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1549, in _get_rpc_info
raise config_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1520, in _get_rpc_info
eventloops, maas_url = yield self._parallel_fetch_rpc_info(urls)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1494, in handle_responses
errors[0].raiseException()
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
raise self.value.with_traceback(self.tb)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1416, in _inlineCallbacks
result = result.throwExceptionIntoGenerator(g)
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 491, in throwExceptionIntoGenerator
return g.throw(self.type, self.value, self.tb)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1455, in _serial_fetch_rpc_info
raise last_exc
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/clusterservice.py", line 1447, in _serial_fetch_rpc_info
response = yield self._fetch_rpc_info(url, orig_url)
twisted.internet.error.ConnectingCancelledError: HostnameAddress(hostname=b'10.217.0.11', port=5240)
2023-07-31 23:16:36 provisioningserver.rpc.common: [debug] [RPC -> sent] AmpBox({b'_command': b'Ping'})
regiond.log:
------------
2023-07-31 23:17:23 maasserver.dhcp: [critical] Error configuring DHCPv6 on rack controller 'pdx01-m01-c34-cpu-01 (xfhrbn)': unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/provisioningserver/prometheus/utils.py", line 127, in wrapper
result = func(*args, **kwargs)
File "/usr/lib/python3/dist-packages/provisioningserver/utils/twisted.py", line 127, in wrapper
return func(*args, **kwargs)
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/common.py", line 176, in __call__
return deferWithTimeout(
File "/usr/lib/python3/dist-packages/provisioningserver/utils/twisted.py", line 325, in deferWithTimeout
d = maybeDeferred(func, *args, **kwargs)
--- <exception caught here> ---
File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 898, in configure_dhcp
yield client(
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 151, in maybeDeferred
result = f(*args, **kw)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 971, in callRemote
return co._doCommand(self)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 2000, in _doCommand
d = proto._sendBoxCommand(self.commandName,
File "/usr/lib/python3/dist-packages/provisioningserver/rpc/common.py", line 261, in _sendBoxCommand
return super()._sendBoxCommand(
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 902, in _sendBoxCommand
box._sendTo(self.boxSender)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 723, in _sendTo
proto.sendBox(self)
File "/usr/lib/python3/dist-packages/twisted/protocols/amp.py", line 2386, in sendBox
self.transport.write(box.serialize())
File "/usr/lib/python3/dist-packages/twisted/internet/_newtls.py", line 191, in write
FileDescriptor.write(self, bytes)
File "/usr/lib/python3/dist-packages/twisted/internet/abstract.py", line 356, in write
self.startWriting()
File "/usr/lib/python3/dist-packages/twisted/internet/abstract.py", line 443, in startWriting
self.reactor.addWriter(self)
File "/usr/lib/python3/dist-packages/twisted/internet/asyncioreactor.py", line 173, in addWriter
self._asyncioEventloop.add_writer(fd, callWithLogger, writer,
File "uvloop/loop.pyx", line 2399, in uvloop.loop.Loop.add_writer
File "uvloop/loop.pyx", line 808, in uvloop.loop.Loop._add_writer
File "uvloop/handles/poll.pyx", line 122, in uvloop.loop.UVPoll.start_writing
File "uvloop/handles/poll.pyx", line 39, in uvloop.loop.UVPoll._poll_start
File "uvloop/handles/handle.pyx", line 159, in uvloop.loop.UVHandle._ensure_alive
builtins.RuntimeError: unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
2023-07-31 23:17:23 maasserver.rack_controller: [critical] Failed configuring DHCP on rack controller 'id:12'.
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1475, in gotResult
_inlineCallbacks(r, g, status)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1464, in _inlineCallbacks
status.deferred.errback()
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 501, in errback
self._startRunCallbacks(fail)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 568, in _startRunCallbacks
self._runCallbacks()
--- <exception caught here> ---
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python3/dist-packages/maasserver/rack_controller.py", line 281, in <lambda>
d.addErrback(lambda f: f.trap(NoConnectionsAvailable))
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 439, in trap
self.raiseException()
File "/usr/lib/python3/dist-packages/twisted/python/failure.py", line 467, in raiseException
raise self.value.with_traceback(self.tb)
File "/usr/lib/python3/dist-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
result = g.send(result)
File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 951, in configure_dhcp
raise ipv4_exc
File "/usr/lib/python3/dist-packages/maasserver/dhcp.py", line 869, in configure_dhcp
yield client(
builtins.RuntimeError: unable to perform operation on <UVPoll closed=True 0x7f33f5cf0660>; the handler is closed
Ubuntu version: 20.04
MAAS: 3.2.99 (Interim version from PPA)
Format: Debian
PostgreSQL 12 |
|