copy files to mdadm raid 1 server hangs
Affects | Status | Importance | Assigned to | Milestone | |
---|---|---|---|---|---|
mdadm (Ubuntu) |
New
|
Undecided
|
Unassigned |
Bug Description
On three occassions the following has now happened:-
Whilst uploading files to our server running ubuntu 10.04 LTS we find the server hangs & the file coping process halts. Below is the output from Dmesg:-
[297600.309244] INFO: task flush-9:0:28496 blocked for more than 120 seconds.
[297600.309511] "echo 0 > /proc/sys/
[297600.309769] flush-9:0 D 00002488 0 28496 2 0x00000000
[297600.309787] d289bb54 00000046 de148000 00002488 00000000 c088e5c0 d7a735a4 c088e5c0
[297600.309810] 0531e746 00010e85 c088e5c0 c088e5c0 d7a735a4 c088e5c0 c088e5c0 d8b1e380
[297600.309832] 00000000 00010e85 d7a73300 de335c00 d289bb5c de335dd8 d289bb7c c04a48f5
[297600.309854] Call Trace:
[297600.309892] [<c04a48f5>] md_write_
[297600.309913] [<c0170a30>] ? autoremove_
[297600.309957] [<e084930a>] make_request+
[297600.309973] [<c04a5053>] md_make_
[297600.310003] [<c03467b4>] generic_
[297600.310020] [<c03469e5>] submit_
[297600.310040] [<c023c0ae>] ? bio_alloc_
[297600.310053] [<c0237b66>] submit_
[297600.310066] [<c023992b>] __block_
[297600.310081] [<c023db20>] ? blkdev_
[297600.310094] [<c023a3da>] block_write_
[297600.310107] [<c02390a0>] ? end_buffer_
[297600.310120] [<c023db20>] ? blkdev_
[297600.310132] [<c023a407>] block_write_
[297600.310144] [<c02390a0>] ? end_buffer_
[297600.310159] [<c023e684>] blkdev_
[297600.310181] [<c01daef0>] __writepage+
[297600.310196] [<c01dbf36>] write_cache_
[297600.310209] [<c01daee0>] ? __writepage+
[297600.310225] [<c0150778>] ? dequeue_
[297600.310237] [<c023e670>] ? blkdev_
[297600.310250] [<c01dc0e4>] generic_
[297600.310263] [<c01dc10c>] do_writepages+
[297600.310283] [<c02320b5>] writeback_
[297600.310297] [<c0232899>] writeback_
[297600.310311] [<c0232cea>] writeback_
[297600.310324] [<c0232fa3>] wb_writeback+
[297600.310338] [<c0233121>] wb_do_writeback
[297600.310352] [<c023316b>] bdi_writeback_
[297600.310365] [<c01eb4b0>] ? bdi_start_
[297600.310376] [<c01eb50d>] bdi_start_
[297600.310388] [<c01eb4b0>] ? bdi_start_
[297600.310399] [<c01707a4>] kthread+0x74/0x80
[297600.310411] [<c0170730>] ? kthread+0x0/0x80
[297600.310428] [<c010a447>] kernel_
[307200.308708] INFO: task kjournald:594 blocked for more than 120 seconds.
[307200.308934] "echo 0 > /proc/sys/
[307200.309192] kjournald D 0000695b 0 594 2 0x00000000
[307200.309209] d8c25cf0 00000046 de148000 0000695b 00000000 c088e5c0 d791b5a4 c088e5c0
[307200.309232] 59ebb558 00011732 c088e5c0 c088e5c0 d791b5a4 c088e5c0 c088e5c0 d7b041c0
[307200.309254] 00000000 00011732 d791b300 de335c00 d8c25cf8 de335dd8 d8c25d18 c04a48f5
[307200.309276] Call Trace:
[307200.309314] [<c04a48f5>] md_write_
[307200.309335] [<c0170a30>] ? autoremove_
[307200.309378] [<e084930a>] make_request+
[307200.309403] [<c01d9717>] ? get_page_
[307200.309418] [<c04a5053>] md_make_
[307200.309448] [<c03467b4>] generic_
[307200.309465] [<c0274a80>] ? ext3_get_
[307200.309493] [<c02374c3>] ? generic_
[307200.309509] [<c03469e5>] submit_
[307200.309527] [<c023c0ae>] ? bio_alloc_
[307200.309539] [<c0237b66>] submit_
[307200.309561] [<c02cacf5>] journal_
[307200.309579] [<c05b2f2f>] ? _spin_lock_
[307200.309609] [<c0164aa8>] ? try_to_
[307200.309626] [<c02cdd15>] kjournald+
[307200.309640] [<c0170a30>] ? autoremove_
[307200.309653] [<c02cdc60>] ? kjournald+0x0/0x1e0
[307200.309664] [<c01707a4>] kthread+0x74/0x80
[307200.309675] [<c0170730>] ? kthread+0x0/0x80
[307200.309693] [<c010a447>] kernel_
[307800.308249] INFO: task kjournald:594 blocked for more than 120 seconds.
[307800.308666] "echo 0 > /proc/sys/
[307800.309272] kjournald D 0001ca8c 0 594 2 0x00000000
[307800.309290] d8c25cf0 00000046 de148000 0001ca8c 00000000 c088e5c0 d791b5a4 c088e5c0
[307800.309313] 1d6caa0c 000117ce c088e5c0 c088e5c0 d791b5a4 c088e5c0 c088e5c0 d88e4e00
[307800.309335] 00000000 000117ce d791b300 de335c00 d8c25cf8 de335dd8 d8c25d18 c04a48f5
[307800.309357] Call Trace:
[307800.309394] [<c04a48f5>] md_write_
[307800.309414] [<c0170a30>] ? autoremove_
[307800.309459] [<e084930a>] make_request+
[307800.309484] [<c01d9717>] ? get_page_
[307800.309499] [<c04a5053>] md_make_
[307800.309529] [<c03467b4>] generic_
[307800.309547] [<c0274a80>] ? ext3_get_
[307800.309575] [<c02374c3>] ? generic_
[307800.309589] [<c01d68c3>] ? mempool_
[307800.309603] [<c03469e5>] submit_
[307800.309621] [<c023c0ae>] ? bio_alloc_
[307800.309634] [<c0237b66>] submit_
[307800.309656] [<c02cacf5>] journal_
[307800.309673] [<c05b2f2f>] ? _spin_lock_
[307800.309704] [<c0164aa8>] ? try_to_
[307800.309721] [<c02cdd15>] kjournald+
[307800.309735] [<c0170a30>] ? autoremove_
[307800.309748] [<c02cdc60>] ? kjournald+0x0/0x1e0
[307800.309759] [<c01707a4>] kthread+0x74/0x80
[307800.309771] [<c0170730>] ? kthread+0x0/0x80
[307800.309788] [<c010a447>] kernel_
[307800.309813] INFO: task flush-9:0:3573 blocked for more than 120 seconds.
[307800.310017] "echo 0 > /proc/sys/
[307800.310273] flush-9:0 D 0000864f 0 3573 2 0x00000000
[307800.310288] de7c1b54 00000046 de148000 0000864f 00000000 c088e5c0 d8b05be4 c088e5c0
[307800.310310] ff2d2b01 000117d2 c088e5c0 c088e5c0 d8b05be4 c088e5c0 c088e5c0 de3e6c40
[307800.310331] 00000000 000117d2 d8b05940 de335c00 de7c1b5c de335dd8 de7c1b7c c04a48f5
[307800.310353] Call Trace:
[307800.310368] [<c04a48f5>] md_write_
[307800.310382] [<c0170a30>] ? autoremove_
[307800.310401] [<e084930a>] make_request+
[307800.310428] [<c016cf90>] ? queue_work_
[307800.310442] [<c04a5053>] md_make_
[307800.310456] [<c016490e>] ? run_timer_
[307800.310471] [<c03467b4>] generic_
[307800.310489] [<c01ad597>] ? __rcu_process_
[307800.310503] [<c015bbb0>] ? __do_softirq+
[307800.310526] [<c01310a8>] ? default_
[307800.310541] [<c01d68c3>] ? mempool_
[307800.310555] [<c03469e5>] submit_
[307800.310569] [<c023c0ae>] ? bio_alloc_
[307800.310581] [<c0237b66>] submit_
[307800.310594] [<c023992b>] __block_
[307800.310609] [<c023db20>] ? blkdev_
[307800.310622] [<c023a3da>] block_write_
[307800.310635] [<c02390a0>] ? end_buffer_
[307800.310648] [<c023db20>] ? blkdev_
[307800.310660] [<c023a407>] block_write_
[307800.310673] [<c02390a0>] ? end_buffer_
[307800.310687] [<c023e684>] blkdev_
[307800.310702] [<c01daef0>] __writepage+
[307800.310717] [<c01dbf36>] write_cache_
[307800.310730] [<c01daee0>] ? __writepage+
[307800.310745] [<c0150778>] ? dequeue_
[307800.310758] [<c023e670>] ? blkdev_
[307800.310771] [<c01dc0e4>] generic_
[307800.310783] [<c01dc10c>] do_writepages+
[307800.310797] [<c02320b5>] writeback_
[307800.310811] [<c0232899>] writeback_
[307800.310825] [<c0232cea>] writeback_
[307800.310838] [<c0232fa3>] wb_writeback+
[307800.310852] [<c0233088>] ? wb_do_writeback
[307800.310864] [<c0233121>] wb_do_writeback
[307800.310878] [<c023316b>] bdi_writeback_
[307800.310891] [<c01eb4b0>] ? bdi_start_
[307800.310902] [<c01eb50d>] bdi_start_
[307800.310914] [<c01eb4b0>] ? bdi_start_
[307800.310925] [<c01707a4>] kthread+0x74/0x80
[307800.310937] [<c0170730>] ? kthread+0x0/0x80
[307800.310949] [<c010a447>] kernel_
[307800.310960] INFO: task smbd:3616 blocked for more than 120 seconds.
[307800.311151] "echo 0 > /proc/sys/
[307800.311407] smbd D 0005c497 0 3616 630 0x00000000
[307800.311422] c853fcf4 00000082 d7914000 0005c497 00000000 c088e5c0 d9d682a4 c088e5c0
[307800.311444] f293f2e9 000117cc c088e5c0 c088e5c0 d9d682a4 c088e5c0 c088e5c0 d7b04380
[307800.311466] 00000000 000117cc d9d68000 c1401f38 d642eb40 c853fd30 c853fd50 c02c9f45
[307800.311487] Call Trace:
[307800.311502] [<c02c9f45>] do_get_
[307800.311516] [<c02c82ff>] ? __journal_
[307800.311530] [<c0170a80>] ? wake_bit_
[307800.311543] [<c02ca2a8>] journal_
[307800.311564] [<c0280c24>] __ext3_
[307800.311578] [<c0273d69>] ext3_reserve_
[307800.311590] [<c0273dbb>] ext3_mark_
[307800.311603] [<c027754d>] add_dirent_
[307800.311617] [<c0278532>] ext3_add_
[307800.311630] [<c0278650>] ext3_add_
[307800.311642] [<c0278825>] ext3_create+
[307800.311665] [<c021e0a6>] vfs_create+
[307800.311678] [<c021e159>] __open_
[307800.311691] [<c0220fee>] do_filp_
[307800.311703] [<c022060a>] ? user_path_
[307800.311718] [<c0360829>] ? copy_to_
[307800.311747] [<c0212005>] do_sys_
[307800.311760] [<c021217e>] sys_open+0x2e/0x40
[307800.311772] [<c01097ac>] syscall_
[308040.308923] INFO: task flush-9:0:3573 blocked for more than 120 seconds.
[308040.309149] "echo 0 > /proc/sys/
[308040.309406] flush-9:0 D 00002d8e 0 3573 2 0x00000000
[308040.309423] de7c1b54 00000046 d7914000 00002d8e 00000000 c088e5c0 d8b05be4 c088e5c0
[308040.309447] 50453c84 00011802 c088e5c0 c088e5c0 d8b05be4 c088e5c0 c088e5c0 d8b1e380
[308040.309468] 00000000 00011802 d8b05940 de335c00 de7c1b5c de335dd8 de7c1b7c c04a48f5
[308040.309490] Call Trace:
[308040.309529] [<c04a48f5>] md_write_
[308040.309549] [<c0170a30>] ? autoremove_
[308040.309593] [<e084930a>] make_request+
[308040.309621] [<c016cf90>] ? queue_work_
[308040.309635] [<c04a5053>] md_make_
[308040.309665] [<c016490e>] ? run_timer_
[308040.309694] [<c03467b4>] generic_
[308040.309713] [<c01ad597>] ? __rcu_process_
[308040.309727] [<c015bbb0>] ? __do_softirq+
[308040.309751] [<c01310a8>] ? default_
[308040.309770] [<c01d68c3>] ? mempool_
[308040.309785] [<c03469e5>] submit_
[308040.309804] [<c023c0ae>] ? bio_alloc_
[308040.309817] [<c0237b66>] submit_
[308040.309830] [<c023992b>] __block_
[308040.309845] [<c023db20>] ? blkdev_
[308040.309858] [<c023a3da>] block_write_
[308040.309871] [<c02390a0>] ? end_buffer_
[308040.309884] [<c023db20>] ? blkdev_
[308040.309896] [<c023a407>] block_write_
[308040.309908] [<c02390a0>] ? end_buffer_
[308040.309923] [<c023e684>] blkdev_
[308040.309940] [<c01daef0>] __writepage+
[308040.309955] [<c01dbf36>] write_cache_
[308040.309968] [<c01daee0>] ? __writepage+
[308040.309983] [<c0150778>] ? dequeue_
[308040.309996] [<c023e670>] ? blkdev_
[308040.310009] [<c01dc0e4>] generic_
[308040.310021] [<c01dc10c>] do_writepages+
[308040.310041] [<c02320b5>] writeback_
[308040.310055] [<c0232899>] writeback_
[308040.310069] [<c0232cea>] writeback_
[308040.310082] [<c0232fa3>] wb_writeback+
[308040.310096] [<c0233088>] ? wb_do_writeback
[308040.310109] [<c0233121>] wb_do_writeback
[308040.310123] [<c023316b>] bdi_writeback_
[308040.310136] [<c01eb4b0>] ? bdi_start_
[308040.310147] [<c01eb50d>] bdi_start_
[308040.310159] [<c01eb4b0>] ? bdi_start_
[308040.310170] [<c01707a4>] kthread+0x74/0x80
[308040.310182] [<c0170730>] ? kthread+0x0/0x80
[308040.310198] [<c010a447>] kernel_
[312356.174944] md: md0: data-check done.
After the file copy process halts, we log on to the server via webmin & find the RAID1 is having to rebuild each time.