Activity log for bug #1900780

Date Who What changed Old value New value Message
2020-10-21 02:06:23 liujinxin bug added bug
2020-10-21 02:28:19 liujinxin description @retrying.retry(stop_max_delay=timeout * 1000, wait_fixed=RETRY_DELAY, retry_on_result=utils.any_vif_inactive) def wait_for_active(kp_name): return self.registry[kp_name]['vifs'] vifs = wait_for_active(kp_name) #Does it make sense to process the following lines of code? If retry timeout, has returned retrieving.RetryError, the following code will never return an exception for vif in vifs.values(): if not vif.active: LOG.error("Timed out waiting for vifs to become active") raise exceptions.ResourceNotReady(kp_name) @retrying.retry(stop_max_delay=timeout * 1000, wait_fixed=RETRY_DELAY,                         retry_on_result=utils.any_vif_inactive)         def wait_for_active(kp_name):             return self.registry[kp_name]['vifs']         vifs = wait_for_active(kp_name)
2020-10-21 02:33:33 liujinxin description @retrying.retry(stop_max_delay=timeout * 1000, wait_fixed=RETRY_DELAY,                         retry_on_result=utils.any_vif_inactive)         def wait_for_active(kp_name):             return self.registry[kp_name]['vifs']         vifs = wait_for_active(kp_name) ERROR kuryr_kubernetes.cni.daemon.service [-] Error when processing addNetwork request. CNI Params: {'CNI_IFNAME': 'eth0', 'CNI_NETNS': '/proc/3256304/ns/net', 'CNI_PATH': '/opt/cni/bin', 'CNI_COMMAND': 'ADD', 'CNI_CONTAINERID': '0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840', 'CNI_ARGS': 'IgnoreUnknown=1;K8S_POD_NAMESPACE=ljx;K8S_POD_NAME=tomcat-5cf6fccd5d-5jmh6;K8S_POD_INFRA_CONTAINER_ID=0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840'}: BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service Traceback (most recent call last): ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/daemon/service.py", line 82, in add ERROR kuryr_kubernetes.cni.daemon.service vif = self.plugin.add(params) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/plugins/k8s_cni_registry.py", line 83, in add ERROR kuryr_kubernetes.cni.daemon.service d = self.registry[kp_name] ERROR kuryr_kubernetes.cni.daemon.service File "<string>", line 2, in __getitem__ ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod ERROR kuryr_kubernetes.cni.daemon.service conn.send((self._id, methodname, args, kwds)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 210, in send ERROR kuryr_kubernetes.cni.daemon.service self._send_bytes(_ForkingPickler.dumps(obj)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 408, in _send_bytes ERROR kuryr_kubernetes.cni.daemon.service self._send(header + buf) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 372, in _send ERROR kuryr_kubernetes.cni.daemon.service n = write(self._handle, buf) ERROR kuryr_kubernetes.cni.daemon.service BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service INFO werkzeug [-] 127.0.0.1 - - [21/Oct/2020 02:31:48] "POST /addNetwork HTTP/1.1" 500 - DEBUG cotyledon._service_manager [-] Shutdown finish _shutdown /usr/local/lib/python3.6/site-packages/cotyledon/_service_manager.py:320
2020-10-21 02:42:15 liujinxin description ERROR kuryr_kubernetes.cni.daemon.service [-] Error when processing addNetwork request. CNI Params: {'CNI_IFNAME': 'eth0', 'CNI_NETNS': '/proc/3256304/ns/net', 'CNI_PATH': '/opt/cni/bin', 'CNI_COMMAND': 'ADD', 'CNI_CONTAINERID': '0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840', 'CNI_ARGS': 'IgnoreUnknown=1;K8S_POD_NAMESPACE=ljx;K8S_POD_NAME=tomcat-5cf6fccd5d-5jmh6;K8S_POD_INFRA_CONTAINER_ID=0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840'}: BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service Traceback (most recent call last): ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/daemon/service.py", line 82, in add ERROR kuryr_kubernetes.cni.daemon.service vif = self.plugin.add(params) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/plugins/k8s_cni_registry.py", line 83, in add ERROR kuryr_kubernetes.cni.daemon.service d = self.registry[kp_name] ERROR kuryr_kubernetes.cni.daemon.service File "<string>", line 2, in __getitem__ ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod ERROR kuryr_kubernetes.cni.daemon.service conn.send((self._id, methodname, args, kwds)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 210, in send ERROR kuryr_kubernetes.cni.daemon.service self._send_bytes(_ForkingPickler.dumps(obj)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 408, in _send_bytes ERROR kuryr_kubernetes.cni.daemon.service self._send(header + buf) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 372, in _send ERROR kuryr_kubernetes.cni.daemon.service n = write(self._handle, buf) ERROR kuryr_kubernetes.cni.daemon.service BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service INFO werkzeug [-] 127.0.0.1 - - [21/Oct/2020 02:31:48] "POST /addNetwork HTTP/1.1" 500 - DEBUG cotyledon._service_manager [-] Shutdown finish _shutdown /usr/local/lib/python3.6/site-packages/cotyledon/_service_manager.py:320 When a large number of ports are created in bulk, the port lossup event of ovs will occur, in fact, the state of ovs-port has been up and functionally normal, but the port state of neutron is down, in this case, more than the number of retry exception will cause cni to restart all the time, and this exception and restart can not really solve the corresponding port of the pod. The port down issue is not what we were expecting. Is it possible that after more than the number of retries, cni continues the add process even though the port is not up? ERROR kuryr_kubernetes.cni.daemon.service [-] Error when processing addNetwork request. CNI Params: {'CNI_IFNAME': 'eth0', 'CNI_NETNS': '/proc/3256304/ns/net', 'CNI_PATH': '/opt/cni/bin', 'CNI_COMMAND': 'ADD', 'CNI_CONTAINERID': '0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840', 'CNI_ARGS': 'IgnoreUnknown=1;K8S_POD_NAMESPACE=ljx;K8S_POD_NAME=tomcat-5cf6fccd5d-5jmh6;K8S_POD_INFRA_CONTAINER_ID=0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840'}: BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service Traceback (most recent call last): ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/daemon/service.py", line 82, in add ERROR kuryr_kubernetes.cni.daemon.service vif = self.plugin.add(params) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/plugins/k8s_cni_registry.py", line 83, in add ERROR kuryr_kubernetes.cni.daemon.service d = self.registry[kp_name] ERROR kuryr_kubernetes.cni.daemon.service File "<string>", line 2, in __getitem__ ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod ERROR kuryr_kubernetes.cni.daemon.service conn.send((self._id, methodname, args, kwds)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 210, in send ERROR kuryr_kubernetes.cni.daemon.service self._send_bytes(_ForkingPickler.dumps(obj)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 408, in _send_bytes ERROR kuryr_kubernetes.cni.daemon.service self._send(header + buf) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 372, in _send ERROR kuryr_kubernetes.cni.daemon.service n = write(self._handle, buf) ERROR kuryr_kubernetes.cni.daemon.service BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service INFO werkzeug [-] 127.0.0.1 - - [21/Oct/2020 02:31:48] "POST /addNetwork HTTP/1.1" 500 - DEBUG cotyledon._service_manager [-] Shutdown finish _shutdown /usr/local/lib/python3.6/site-packages/cotyledon/_service_manager.py:320
2020-10-21 02:44:38 liujinxin summary RetryError in kuryr-daemon A port state down causes cni-daemon to restart repeatedly.
2020-10-21 07:04:26 liujinxin description When a large number of ports are created in bulk, the port lossup event of ovs will occur, in fact, the state of ovs-port has been up and functionally normal, but the port state of neutron is down, in this case, more than the number of retry exception will cause cni to restart all the time, and this exception and restart can not really solve the corresponding port of the pod. The port down issue is not what we were expecting. Is it possible that after more than the number of retries, cni continues the add process even though the port is not up? ERROR kuryr_kubernetes.cni.daemon.service [-] Error when processing addNetwork request. CNI Params: {'CNI_IFNAME': 'eth0', 'CNI_NETNS': '/proc/3256304/ns/net', 'CNI_PATH': '/opt/cni/bin', 'CNI_COMMAND': 'ADD', 'CNI_CONTAINERID': '0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840', 'CNI_ARGS': 'IgnoreUnknown=1;K8S_POD_NAMESPACE=ljx;K8S_POD_NAME=tomcat-5cf6fccd5d-5jmh6;K8S_POD_INFRA_CONTAINER_ID=0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840'}: BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service Traceback (most recent call last): ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/daemon/service.py", line 82, in add ERROR kuryr_kubernetes.cni.daemon.service vif = self.plugin.add(params) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/plugins/k8s_cni_registry.py", line 83, in add ERROR kuryr_kubernetes.cni.daemon.service d = self.registry[kp_name] ERROR kuryr_kubernetes.cni.daemon.service File "<string>", line 2, in __getitem__ ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod ERROR kuryr_kubernetes.cni.daemon.service conn.send((self._id, methodname, args, kwds)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 210, in send ERROR kuryr_kubernetes.cni.daemon.service self._send_bytes(_ForkingPickler.dumps(obj)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 408, in _send_bytes ERROR kuryr_kubernetes.cni.daemon.service self._send(header + buf) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 372, in _send ERROR kuryr_kubernetes.cni.daemon.service n = write(self._handle, buf) ERROR kuryr_kubernetes.cni.daemon.service BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service INFO werkzeug [-] 127.0.0.1 - - [21/Oct/2020 02:31:48] "POST /addNetwork HTTP/1.1" 500 - DEBUG cotyledon._service_manager [-] Shutdown finish _shutdown /usr/local/lib/python3.6/site-packages/cotyledon/_service_manager.py:320 problem phenomenon: NAME READY STATUS RESTARTS AGE kuryr-cni-ds-spq5q 0/1 CrashLoopBackOff 233 16h kuryr-controller-666658496d-hbl4t 0/1 Running 106 16h tomcat-5cf6fccd5d-5jmh6 0/1 ContainerCreating 0 15h # kubectl get kp tomcat-5cf6fccd5d-5jmh6 -oyaml |egrep 'interface_id:|active' active: false interface_id: d223e87d-6c38-45d3-bbe3-af35ea439a60 # openstack port show d223e87d-6c38-45d3-bbe3-af35ea439a60 |grep ' status' | status | DOWN | The ovs port up event is lost, in fact the state of the ovs-port is always up and functional, but the state of the neutron port is down, in this case an exception that exceeds the number of retries will cause cni to reboot all the time, and this exception and reboot does not really solve the corresponding port's pod. It is possible that the state of a particular neutron port port is down, but it is not what we would expect if this causes the entire kuryr component to keep rebooting exceptionally. Is it possible that cni will continue to add processes after more than the number of retries, even if the port is not up? Note: Batch creation of pods on a kuryr-node, port up events are missing for a few ports (the ovs-agent in my environment had an exception in the middle and the port up events were not notified to the neutron-server). kuryr-cni daemon crash log: ERROR kuryr_kubernetes.cni.daemon.service [-] Error when processing addNetwork request. CNI Params: {'CNI_IFNAME': 'eth0', 'CNI_NETNS': '/proc/3256304/ns/net', 'CNI_PATH': '/opt/cni/bin', 'CNI_COMMAND': 'ADD', 'CNI_CONTAINERID': '0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840', 'CNI_ARGS': 'IgnoreUnknown=1;K8S_POD_NAMESPACE=ljx;K8S_POD_NAME=tomcat-5cf6fccd5d-5jmh6;K8S_POD_INFRA_CONTAINER_ID=0fe17cdbc5ff678b01d797cf8a4e7a4a027356b987681aadc563260fde1a9840'}: BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service Traceback (most recent call last): ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/daemon/service.py", line 82, in add ERROR kuryr_kubernetes.cni.daemon.service vif = self.plugin.add(params) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/cni/plugins/k8s_cni_registry.py", line 83, in add ERROR kuryr_kubernetes.cni.daemon.service d = self.registry[kp_name] ERROR kuryr_kubernetes.cni.daemon.service File "<string>", line 2, in __getitem__ ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/managers.py", line 756, in _callmethod ERROR kuryr_kubernetes.cni.daemon.service conn.send((self._id, methodname, args, kwds)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 210, in send ERROR kuryr_kubernetes.cni.daemon.service self._send_bytes(_ForkingPickler.dumps(obj)) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 408, in _send_bytes ERROR kuryr_kubernetes.cni.daemon.service self._send(header + buf) ERROR kuryr_kubernetes.cni.daemon.service File "/usr/lib64/python3.6/multiprocessing/connection.py", line 372, in _send ERROR kuryr_kubernetes.cni.daemon.service n = write(self._handle, buf) ERROR kuryr_kubernetes.cni.daemon.service BrokenPipeError: [Errno 32] Broken pipe ERROR kuryr_kubernetes.cni.daemon.service INFO werkzeug [-] 127.0.0.1 - - [21/Oct/2020 02:31:48] "POST /addNetwork HTTP/1.1" 500 - DEBUG cotyledon._service_manager [-] Shutdown finish _shutdown /usr/local/lib/python3.6/site-packages/cotyledon/_service_manager.py:320 kuryr-controller dead log: ERROR kuryr_kubernetes.handlers.logging Traceback (most recent call last): ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/handlers/logging.py", line 37, in __call__ ERROR kuryr_kubernetes.handlers.logging self._handler(event, *args, **kwargs) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/handlers/retry.py", line 93, in __call__ ERROR kuryr_kubernetes.handlers.logging self._handler.set_liveness(alive=False) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/oslo_utils/excutils.py", line 220, in __exit__ ERROR kuryr_kubernetes.handlers.logging self.force_reraise() ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/oslo_utils/excutils.py", line 196, in force_reraise ERROR kuryr_kubernetes.handlers.logging six.reraise(self.type_, self.value, self.tb) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/six.py", line 703, in reraise ERROR kuryr_kubernetes.handlers.logging raise value ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/handlers/retry.py", line 78, in __call__ ERROR kuryr_kubernetes.handlers.logging self._handler(event, *args, **kwargs) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/handlers/k8s_base.py", line 84, in __call__ ERROR kuryr_kubernetes.handlers.logging self.on_present(obj) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/controller/handlers/kuryrport.py", line 88, in on_present ERROR kuryr_kubernetes.handlers.logging self._drv_vif_pool.activate_vif(data['vif']) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/controller/drivers/vif_pool.py", line 1258, in activate_vif ERROR kuryr_kubernetes.handlers.logging self._vif_drvs[vif_drv_alias].activate_vif(vif) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/controller/drivers/vif_pool.py", line 169, in activate_vif ERROR kuryr_kubernetes.handlers.logging self._drv_vif.activate_vif(vif) ERROR kuryr_kubernetes.handlers.logging File "/usr/local/lib/python3.6/site-packages/kuryr_kubernetes/controller/drivers/neutron_vif.py", line 92, in activate_vif ERROR kuryr_kubernetes.handlers.logging raise k_exc.ResourceNotReady(vif) ERROR kuryr_kubernetes.handlers.logging kuryr_kubernetes.exceptions.ResourceNotReady: Resource not ready: VIFOpenVSwitch(active=False,address=fa:16:3e:b6:d2:4a,bridge_name='br-int',has_traffic_filtering=True,id=8d85b962-c75a-4348-b68c-6ff5c0a951a6,network=Network(84f65523-4252-4197-85ec-75e10dc46389),plugin='ovs',port_profile=VIFPortProfileOpenVSwitch,preserve_on_delete=False,vif_name='tap8d85b962-c7') ERROR kuryr_kubernetes.handlers.logging ERROR kuryr_kubernetes.controller.managers.health [-] Component KuryrPortHandler is dead. INFO kuryr_kubernetes.controller.service [-] Service 'KuryrK8sService' stopping INFO kuryr_kubernetes.watcher [-] Stopped watching '/apis/openstack.org/v1/kuryrports' INFO kuryr_kubernetes.watcher [-] Stopped watching '/apis/openstack.org/v1/kuryrnetworks' INFO kuryr_kubernetes.watcher [-] Stopped watching '/api/v1/namespaces' INFO kuryr_kubernetes.watcher [-] Stopped watching '/api/v1/pods' INFO kuryr_kubernetes.watcher [-] Stopped watching '/apis/networking.k8s.io/v1/networkpolicies' INFO kuryr_kubernetes.watcher [-] No remaining active watchers, Exiting... INFO kuryr_kubernetes.controller.service [-] Service 'KuryrK8sService' stopping INFO kuryr_kubernetes.controller.service [-] Service 'KuryrK8sService' stopped