# git rev-parse -q --verify ed16e99272308afe183d983523bf995e8a517026^{commit} ed16e99272308afe183d983523bf995e8a517026 already have revision, skipping fetch # git checkout -q -f -B kisskb ed16e99272308afe183d983523bf995e8a517026 # git clean -qxdf # < git log -1 # commit ed16e99272308afe183d983523bf995e8a517026 # Author: Michael Neuling # Date: Mon Mar 26 15:17:07 2018 +1100 # # powerpc/eeh: Fix race with driver un/bind # # The current EEH callbacks can race with a driver unbind. This can # result in a backtraces like this: # # EEH: Frozen PHB#0-PE#1fc detected # EEH: PE location: S000009, PHB location: N/A # CPU: 2 PID: 2312 Comm: kworker/u258:3 Not tainted 4.15.6-openpower1 #2 # Workqueue: nvme-wq nvme_reset_work [nvme] # Call Trace: # dump_stack+0x9c/0xd0 (unreliable) # eeh_dev_check_failure+0x420/0x470 # eeh_check_failure+0xa0/0xa4 # nvme_reset_work+0x138/0x1414 [nvme] # process_one_work+0x1ec/0x328 # worker_thread+0x2e4/0x3a8 # kthread+0x14c/0x154 # ret_from_kernel_thread+0x5c/0xc8 # nvme nvme1: Removing after probe failure status: -19 # # cpu 0x23: Vector: 300 (Data Access) at [c000000ff50f3800] # pc: c0080000089a0eb0: nvme_error_detected+0x4c/0x90 [nvme] # lr: c000000000026564: eeh_report_error+0xe0/0x110 # sp: c000000ff50f3a80 # msr: 9000000000009033 # dar: 400 # dsisr: 40000000 # current = 0xc000000ff507c000 # paca = 0xc00000000fdc9d80 softe: 0 irq_happened: 0x01 # pid = 782, comm = eehd # Linux version 4.15.6-openpower1 (smc@smc-desktop) (gcc version 6.4.0 (Buildroot 2017.11.2-00008-g4b6188e)) #2 SM P Tue Feb 27 12:33:27 PST 2018 # enter ? for help # eeh_report_error+0xe0/0x110 # eeh_pe_dev_traverse+0xc0/0xdc # eeh_handle_normal_event+0x184/0x4c4 # eeh_handle_event+0x30/0x288 # eeh_event_handler+0x124/0x170 # kthread+0x14c/0x154 # ret_from_kernel_thread+0x5c/0xc8 # # The first part is an EEH (on boot), the second half is the resulting # crash. nvme probe starts the nvme_reset_work() worker thread. This # worker thread starts touching the device which see a device error # (EEH) and hence queues up an event in the powerpc EEH worker # thread. nvme_reset_work() then continues and runs # nvme_remove_dead_ctrl_work() which results in unbinding the driver # from the device and hence releases all resources. At the same time, # the EEH worker thread starts doing the EEH .error_detected() driver # callback, which no longer works since the resources have been freed. # # This fixes the problem in the same way the generic PCIe AER code (in # drivers/pci/pcie/aer/aerdrv_core.c) does. It makes the EEH code hold # the device_lock() while performing the driver EEH callbacks and # associated code. This ensures either the callbacks are no longer # register, or if they are registered the driver will not be removed # from underneath us. # # This has been broken forever. The EEH call backs were first introduced # in 2005 (in 77bd7415610) but it's not clear if a lock was needed back # then. # # Fixes: 77bd74156101 ("[PATCH] powerpc: PCI Error Recovery: PPC64 core recovery routines") # Cc: stable@vger.kernel.org # v2.6.16+ # Signed-off-by: Michael Neuling # Reviewed-by: Benjamin Herrenschmidt # Signed-off-by: Michael Ellerman # < /opt/cross/kisskb/gcc-4.6.3-nolibc/powerpc-linux/bin/powerpc-linux-gcc --version # < git log --format=%s --max-count=1 ed16e99272308afe183d983523bf995e8a517026 # < make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-next_44x_akebono_defconfig_powerpc CROSS_COMPILE=/opt/cross/kisskb/gcc-4.6.3-nolibc/powerpc-linux/bin/powerpc-linux- 44x/akebono_defconfig # make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-next_44x_akebono_defconfig_powerpc CROSS_COMPILE=/opt/cross/kisskb/gcc-4.6.3-nolibc/powerpc-linux/bin/powerpc-linux- /kisskb/src/kernel/printk/printk.c: In function 'devkmsg_sysctl_set_loglvl': /kisskb/src/kernel/printk/printk.c:183:16: warning: 'old' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/kernel/rcu/srcutree.c: In function 'init_srcu_struct_fields': /kisskb/src/kernel/rcu/srcutree.c:146:32: warning: 'levelspread[]' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/kernel/rcu/srcutree.c:94:6: note: 'levelspread[]' was declared here /kisskb/src/drivers/tty/serial/8250/8250_core.c: In function 'univ8250_release_irq': /kisskb/src/drivers/tty/serial/8250/8250_core.c:251:18: warning: 'i' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/tty/serial/8250/8250_core.c:231:19: note: 'i' was declared here WARNING: modpost: Found 1 section mismatch(es). To see full details build your kernel with: 'make CONFIG_DEBUG_SECTION_MISMATCH=y' Completed OK # rm -rf /kisskb/build/powerpc-next_44x_akebono_defconfig_powerpc # Build took: 0:00:45.667813