4.15.0-43-generic #46~16.04.1-Ubuntu SMP x86_64
I have the same problem but the process that gets stuck is dockerd not systemd.
3,2077,80354225063,-;INFO: task dockerd:2070 blocked for more than 120 seconds. 3,2078,80354230430,-; Tainted: P O 4.15.0-43-generic #46~16.04.1-Ubuntu 3,2079,80354236566,-;"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 6,2080,80354242978,-;dockerd D 0 2070 1 0x00000000 4,2081,80354242981,-;Call Trace: 4,2082,80354242984,-; __schedule+0x3d6/0x8b0 4,2083,80354242986,-; ? xen_smp_send_reschedule+0x10/0x20 4,2084,80354242989,-; schedule+0x36/0x80 4,2085,80354242991,-; schedule_timeout+0x1db/0x370 4,2086,80354242993,-; ? try_to_wake_up+0x59/0x4a0 4,2087,80354242995,-; wait_for_completion+0xb4/0x140 4,2088,80354242996,-; ? wake_up_q+0x70/0x70 4,2089,80354242998,-; flush_work+0x129/0x1e0 4,2090,80354242999,-; ? worker_detach_from_pool+0xb0/0xb0 4,2091,80354243001,-; flush_delayed_work+0x3f/0x50 4,2092,80354243004,-; fsnotify_wait_marks_destroyed+0x15/0x20 4,2093,80354243005,-; fsnotify_destroy_group+0x48/0xd0 4,2094,80354243008,-; inotify_release+0x1e/0x50 4,2095,80354243011,-; __fput+0xea/0x220 4,2096,80354243013,-; ____fput+0xe/0x10 4,2097,80354243014,-; task_work_run+0x8a/0xb0 4,2098,80354243016,-; exit_to_usermode_loop+0xc4/0xd0 4,2099,80354243018,-; do_syscall_64+0xf4/0x130 4,2100,80354243020,-; entry_SYSCALL_64_after_hwframe+0x3d/0xa2
The tainted marker is from zfs module since I have a ZFS partition mounted in the system. The / partition however is EXT4, and docker is running from /.
cat /proc/1/stack [<0>] flush_work+0x129/0x1e0 [<0>] flush_delayed_work+0x3f/0x50 [<0>] fsnotify_wait_marks_destroyed+0x15/0x20 [<0>] fsnotify_destroy_group+0x48/0xd0 [<0>] inotify_release+0x1e/0x50 [<0>] __fput+0xea/0x220 [<0>] ____fput+0xe/0x10 [<0>] task_work_run+0x8a/0xb0 [<0>] exit_to_usermode_loop+0xc4/0xd0 [<0>] do_syscall_64+0xf4/0x130 [<0>] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [<0>] 0xffffffffffffffff
6,2111,218820611653,-; task PC stack pid father 6,2112,218820614372,-;systemd D 0 1 0 0x00000004 4,2113,218820616975,-;Call Trace: 4,2114,218820618224,-; __schedule+0x3d6/0x8b0 4,2115,218820619977,-; ? xen_smp_send_reschedule+0x10/0x20 4,2116,218820622101,-; schedule+0x36/0x80 4,2117,218820624470,-; schedule_timeout+0x1db/0x370 4,2118,218820627660,-; ? delete_node+0x1a5/0x1f0 4,2119,218820630763,-; wait_for_completion+0xb4/0x140 4,2120,218820634184,-; ? wake_up_q+0x70/0x70 4,2121,218820638148,-; flush_work+0x129/0x1e0 4,2122,218820642490,-; ? worker_detach_from_pool+0xb0/0xb0 4,2123,218820646706,-; flush_delayed_work+0x3f/0x50 4,2124,218820650474,-; fsnotify_wait_marks_destroyed+0x15/0x20 4,2125,218820654789,-; fsnotify_destroy_group+0x48/0xd0 4,2126,218820658838,-; inotify_release+0x1e/0x50 4,2127,218820662400,-; __fput+0xea/0x220 4,2128,218820665486,-; ____fput+0xe/0x10 4,2129,218820668890,-; task_work_run+0x8a/0xb0 4,2130,218820672633,-; exit_to_usermode_loop+0xc4/0xd0 4,2131,218820676852,-; do_syscall_64+0xf4/0x130 4,2132,218820680778,-; entry_SYSCALL_64_after_hwframe+0x3d/0xa2 4,2133,218820686587,-;RIP: 0033:0x7efd22aaf57d 4,2134,218820691555,-;RSP: 002b:00007ffd463e6070 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 4,2135,218820699907,-;RAX: 0000000000000000 RBX: 000000000000000d RCX: 00007efd22aaf57d 4,2136,218820708355,-;RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000000000d 4,2137,218820717053,-;RBP: 00007efd241ea708 R08: 00005559085084d0 R09: 0000000000000001 4,2138,218820726163,-;R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000002 4,2139,218820734459,-;R13: 000055590749d580 R14: 00005559084bd9b8 R15: 000055590749e9c0
Also kworker shows a stack at some point.
3,1861,80233315906,-;INFO: task kworker/u30:3:5705 blocked for more than 120 seconds. 3,1862,80233321444,-; Tainted: P O 4.15.0-43-generic #46~16.04.1-Ubuntu 3,1863,80233327648,-;"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 6,1864,80233333902,-;kworker/u30:3 D 0 5705 2 0x80000000 6,1865,80233333909,-;Workqueue: events_unbound fsnotify_mark_destroy_workfn 4,1866,80233333910,-;Call Trace: 4,1867,80233333914,-; __schedule+0x3d6/0x8b0 4,1868,80233333918,-; schedule+0x36/0x80 4,1869,80233333920,-; schedule_timeout+0x1db/0x370 4,1870,80233333927,-; ? __enqueue_entity+0x5c/0x60 4,1871,80233333932,-; ? enqueue_entity+0x112/0x670 4,1872,80233333937,-; wait_for_completion+0xb4/0x140 4,1873,80233333939,-; ? wake_up_q+0x70/0x70 4,1874,80233333944,-; __synchronize_srcu.part.13+0x85/0xb0 4,1875,80233333947,-; ? trace_raw_output_rcu_utilization+0x50/0x50 4,1876,80233333950,-; synchronize_srcu+0xd3/0xe0 4,1877,80233333956,-; ? synchronize_srcu+0xd3/0xe0 4,1878,80233333962,-; fsnotify_mark_destroy_workfn+0x7c/0xe0 4,1879,80233333966,-; process_one_work+0x14d/0x410 4,1880,80233333968,-; worker_thread+0x22b/0x460 4,1881,80233333971,-; kthread+0x105/0x140 4,1882,80233333974,-; ? process_one_work+0x410/0x410 4,1883,80233333976,-; ? kthread_destroy_worker+0x50/0x50 4,1884,80233333979,-; ret_from_fork+0x35/0x40
A note on this is that the issues reappears after a reboot either in a day or in a few days.
4.15.0-43-generic #46~16.04.1-Ubuntu SMP x86_64
I have the same problem but the process that gets stuck is dockerd not systemd.
3,2077, 80354225063, -;INFO: task dockerd:2070 blocked for more than 120 seconds. 80354230430, -; Tainted: P O 4.15.0-43-generic #46~16.04.1-Ubuntu 80354236566, -;"echo 0 > /proc/sys/ kernel/ hung_task_ timeout_ secs" disables this message. 80354242978, -;dockerd D 0 2070 1 0x00000000 80354242981, -;Call Trace: 80354242984, -; __schedule+ 0x3d6/0x8b0 80354242986, -; ? xen_smp_ send_reschedule +0x10/0x20 80354242989, -; schedule+0x36/0x80 80354242991, -; schedule_ timeout+ 0x1db/0x370 80354242993, -; ? try_to_ wake_up+ 0x59/0x4a0 80354242995, -; wait_for_ completion+ 0xb4/0x140 80354242996, -; ? wake_up_q+0x70/0x70 80354242998, -; flush_work+ 0x129/0x1e0 80354242999, -; ? worker_ detach_ from_pool+ 0xb0/0xb0 80354243001, -; flush_delayed_ work+0x3f/ 0x50 80354243004, -; fsnotify_ wait_marks_ destroyed+ 0x15/0x20 80354243005, -; fsnotify_ destroy_ group+0x48/ 0xd0 80354243008, -; inotify_ release+ 0x1e/0x50 80354243011, -; __fput+0xea/0x220 80354243013, -; ____fput+0xe/0x10 80354243014, -; task_work_ run+0x8a/ 0xb0 80354243016, -; exit_to_ usermode_ loop+0xc4/ 0xd0 80354243018, -; do_syscall_ 64+0xf4/ 0x130 80354243020, -; entry_SYSCALL_ 64_after_ hwframe+ 0x3d/0xa2
3,2078,
3,2079,
6,2080,
4,2081,
4,2082,
4,2083,
4,2084,
4,2085,
4,2086,
4,2087,
4,2088,
4,2089,
4,2090,
4,2091,
4,2092,
4,2093,
4,2094,
4,2095,
4,2096,
4,2097,
4,2098,
4,2099,
4,2100,
The tainted marker is from zfs module since I have a ZFS partition mounted in the system.
The / partition however is EXT4, and docker is running from /.
cat /proc/1/stack 0x129/0x1e0 work+0x3f/ 0x50 wait_marks_ destroyed+ 0x15/0x20 destroy_ group+0x48/ 0xd0 release+ 0x1e/0x50 run+0x8a/ 0xb0 usermode_ loop+0xc4/ 0xd0 64+0xf4/ 0x130 64_after_ hwframe+ 0x3d/0xa2
[<0>] flush_work+
[<0>] flush_delayed_
[<0>] fsnotify_
[<0>] fsnotify_
[<0>] inotify_
[<0>] __fput+0xea/0x220
[<0>] ____fput+0xe/0x10
[<0>] task_work_
[<0>] exit_to_
[<0>] do_syscall_
[<0>] entry_SYSCALL_
[<0>] 0xffffffffffffffff
6,2111, 218820611653, -; task PC stack pid father 218820614372, -;systemd D 0 1 0 0x00000004 218820616975, -;Call Trace: 218820618224, -; __schedule+ 0x3d6/0x8b0 218820619977, -; ? xen_smp_ send_reschedule +0x10/0x20 218820622101, -; schedule+0x36/0x80 218820624470, -; schedule_ timeout+ 0x1db/0x370 218820627660, -; ? delete_ node+0x1a5/ 0x1f0 218820630763, -; wait_for_ completion+ 0xb4/0x140 218820634184, -; ? wake_up_q+0x70/0x70 218820638148, -; flush_work+ 0x129/0x1e0 218820642490, -; ? worker_ detach_ from_pool+ 0xb0/0xb0 218820646706, -; flush_delayed_ work+0x3f/ 0x50 218820650474, -; fsnotify_ wait_marks_ destroyed+ 0x15/0x20 218820654789, -; fsnotify_ destroy_ group+0x48/ 0xd0 218820658838, -; inotify_ release+ 0x1e/0x50 218820662400, -; __fput+0xea/0x220 218820665486, -; ____fput+0xe/0x10 218820668890, -; task_work_ run+0x8a/ 0xb0 218820672633, -; exit_to_ usermode_ loop+0xc4/ 0xd0 218820676852, -; do_syscall_ 64+0xf4/ 0x130 218820680778, -; entry_SYSCALL_ 64_after_ hwframe+ 0x3d/0xa2 218820686587, -;RIP: 0033:0x7efd22aaf57d 218820691555, -;RSP: 002b:00007ffd46 3e6070 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 218820699907, -;RAX: 0000000000000000 RBX: 000000000000000d RCX: 00007efd22aaf57d 218820708355, -;RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000000000d 218820717053, -;RBP: 00007efd241ea708 R08: 00005559085084d0 R09: 0000000000000001 218820726163, -;R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000002 218820734459, -;R13: 000055590749d580 R14: 00005559084bd9b8 R15: 000055590749e9c0
6,2112,
4,2113,
4,2114,
4,2115,
4,2116,
4,2117,
4,2118,
4,2119,
4,2120,
4,2121,
4,2122,
4,2123,
4,2124,
4,2125,
4,2126,
4,2127,
4,2128,
4,2129,
4,2130,
4,2131,
4,2132,
4,2133,
4,2134,
4,2135,
4,2136,
4,2137,
4,2138,
4,2139,
Also kworker shows a stack at some point.
3,1861, 80233315906, -;INFO: task kworker/u30:3:5705 blocked for more than 120 seconds. 80233321444, -; Tainted: P O 4.15.0-43-generic #46~16.04.1-Ubuntu 80233327648, -;"echo 0 > /proc/sys/ kernel/ hung_task_ timeout_ secs" disables this message. 80233333902, -;kworker/ u30:3 D 0 5705 2 0x80000000 80233333909, -;Workqueue: events_unbound fsnotify_ mark_destroy_ workfn 80233333910, -;Call Trace: 80233333914, -; __schedule+ 0x3d6/0x8b0 80233333918, -; schedule+0x36/0x80 80233333920, -; schedule_ timeout+ 0x1db/0x370 80233333927, -; ? __enqueue_ entity+ 0x5c/0x60 80233333932, -; ? enqueue_ entity+ 0x112/0x670 80233333937, -; wait_for_ completion+ 0xb4/0x140 80233333939, -; ? wake_up_q+0x70/0x70 80233333944, -; __synchronize_ srcu.part. 13+0x85/ 0xb0 80233333947, -; ? trace_raw_ output_ rcu_utilization +0x50/0x50 80233333950, -; synchronize_ srcu+0xd3/ 0xe0 80233333956, -; ? synchronize_ srcu+0xd3/ 0xe0 80233333962, -; fsnotify_ mark_destroy_ workfn+ 0x7c/0xe0 80233333966, -; process_ one_work+ 0x14d/0x410 80233333968, -; worker_ thread+ 0x22b/0x460 80233333971, -; kthread+0x105/0x140 80233333974, -; ? process_ one_work+ 0x410/0x410 80233333976, -; ? kthread_ destroy_ worker+ 0x50/0x50 80233333979, -; ret_from_ fork+0x35/ 0x40
3,1862,
3,1863,
6,1864,
6,1865,
4,1866,
4,1867,
4,1868,
4,1869,
4,1870,
4,1871,
4,1872,
4,1873,
4,1874,
4,1875,
4,1876,
4,1877,
4,1878,
4,1879,
4,1880,
4,1881,
4,1882,
4,1883,
4,1884,
A note on this is that the issues reappears after a reboot either in a day or in a few days.