# git rev-parse -q --verify a279b25e6a61401a95641f2c0ac72ae6b07757fe^{commit} a279b25e6a61401a95641f2c0ac72ae6b07757fe already have revision, skipping fetch # git checkout -q -f -B kisskb a279b25e6a61401a95641f2c0ac72ae6b07757fe # git clean -qxdf # < git log -1 # commit a279b25e6a61401a95641f2c0ac72ae6b07757fe # Author: Michael Neuling # Date: Mon Mar 26 15:17:07 2018 +1100 # # powerpc/eeh: Fix race with driver un/bind # # The current EEH callbacks can race with a driver unbind. This can # result in a backtraces like this: # # EEH: Frozen PHB#0-PE#1fc detected # EEH: PE location: S000009, PHB location: N/A # CPU: 2 PID: 2312 Comm: kworker/u258:3 Not tainted 4.15.6-openpower1 #2 # Workqueue: nvme-wq nvme_reset_work [nvme] # Call Trace: # dump_stack+0x9c/0xd0 (unreliable) # eeh_dev_check_failure+0x420/0x470 # eeh_check_failure+0xa0/0xa4 # nvme_reset_work+0x138/0x1414 [nvme] # process_one_work+0x1ec/0x328 # worker_thread+0x2e4/0x3a8 # kthread+0x14c/0x154 # ret_from_kernel_thread+0x5c/0xc8 # nvme nvme1: Removing after probe failure status: -19 # # cpu 0x23: Vector: 300 (Data Access) at [c000000ff50f3800] # pc: c0080000089a0eb0: nvme_error_detected+0x4c/0x90 [nvme] # lr: c000000000026564: eeh_report_error+0xe0/0x110 # sp: c000000ff50f3a80 # msr: 9000000000009033 # dar: 400 # dsisr: 40000000 # current = 0xc000000ff507c000 # paca = 0xc00000000fdc9d80 softe: 0 irq_happened: 0x01 # pid = 782, comm = eehd # Linux version 4.15.6-openpower1 (smc@smc-desktop) (gcc version 6.4.0 (Buildroot 2017.11.2-00008-g4b6188e)) #2 SM P Tue Feb 27 12:33:27 PST 2018 # enter ? for help # eeh_report_error+0xe0/0x110 # eeh_pe_dev_traverse+0xc0/0xdc # eeh_handle_normal_event+0x184/0x4c4 # eeh_handle_event+0x30/0x288 # eeh_event_handler+0x124/0x170 # kthread+0x14c/0x154 # ret_from_kernel_thread+0x5c/0xc8 # # The first part is an EEH (on boot), the second half is the resulting # crash. nvme probe starts the nvme_reset_work() worker thread. This # worker thread starts touching the device which see a device error # (EEH) and hence queues up an event in the powerpc EEH worker # thread. nvme_reset_work() then continues and runs # nvme_remove_dead_ctrl_work() which results in unbinding the driver # from the device and hence releases all resources. At the same time, # the EEH worker thread starts doing the EEH .error_detected() driver # callback, which no longer works since the resources have been freed. # # This fixes the problem in the same way the generic PCIe AER code (in # drivers/pci/pcie/aer/aerdrv_core.c) does. It makes the EEH code hold # the device_lock() while performing the driver EEH callbacks and # associated code. This ensures either the callbacks are no longer # register, or if they are registered the driver will not be removed # from underneath us. # # This has been broken forever. The EEH call backs were first introduced # in 2005 (in 77bd7415610) but it's not clear if a lock was needed back # then. # # Fixes: 77bd74156101 ("[PATCH] powerpc: PCI Error Recovery: PPC64 core recovery routines") # Cc: stable@vger.kernel.org # v2.6.16+ # Signed-off-by: Michael Neuling # Reviewed-by: Benjamin Herrenschmidt # Signed-off-by: Michael Ellerman # < /opt/cross/kisskb/gcc-5.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc --version # < git log --format=%s --max-count=1 a279b25e6a61401a95641f2c0ac72ae6b07757fe # < make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-next_powerpc-allyesconfig_powerpc-5.3 CROSS_COMPILE=/opt/cross/kisskb/gcc-5.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux- allyesconfig # Added to kconfig CONFIG_PROFILE_ALL_BRANCHES=n # Added to kconfig CONFIG_ENABLE_WARN_DEPRECATED=n # Added to kconfig CONFIG_ENABLE_MUST_CHECK=n # Added to kconfig CONFIG_DEBUG_INFO=n # Added to kconfig CONFIG_BUILD_DOCSRC=n # Added to kconfig CONFIG_MODULE_SIG=n # yes \n | make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-next_powerpc-allyesconfig_powerpc-5.3 CROSS_COMPILE=/opt/cross/kisskb/gcc-5.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux- oldconfig yes: standard output: Broken pipe yes: write error # make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-next_powerpc-allyesconfig_powerpc-5.3 CROSS_COMPILE=/opt/cross/kisskb/gcc-5.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux- /kisskb/src/sound/aoa/codecs/onyx.c: In function 'onyx_snd_single_bit_get': /kisskb/src/sound/aoa/codecs/onyx.c:380:37: warning: 'c' may be used uninitialized in this function [-Wmaybe-uninitialized] ucontrol->value.integer.value[0] = !!(c & mask) ^ polarity; ^ In file included from /kisskb/src/include/linux/io.h:25:0, from /kisskb/src/include/linux/of_address.h:7, from /kisskb/src/drivers/ata/ahci_qoriq.c:18: /kisskb/src/drivers/ata/ahci_qoriq.c: In function 'ahci_qoriq_hardreset': /kisskb/src/arch/powerpc/include/asm/io.h:625:3: warning: 'px_is' may be used uninitialized in this function [-Wmaybe-uninitialized] ppc_pci_io.name al; \ ^ /kisskb/src/drivers/ata/ahci_qoriq.c:86:14: note: 'px_is' was declared here u32 px_cmd, px_is, px_val; ^ In file included from /kisskb/src/include/linux/io.h:25:0, from /kisskb/src/include/linux/of_address.h:7, from /kisskb/src/drivers/ata/ahci_qoriq.c:18: /kisskb/src/arch/powerpc/include/asm/io.h:144:2: warning: 'px_cmd' may be used uninitialized in this function [-Wmaybe-uninitialized] __asm__ __volatile__("sync;"#insn" %1,%y0" \ ^ /kisskb/src/drivers/ata/ahci_qoriq.c:86:6: note: 'px_cmd' was declared here u32 px_cmd, px_is, px_val; ^ /kisskb/src/drivers/input/joystick/analog.c:176:2: warning: #warning Precise timer not defined for this architecture. [-Wcpp] #warning Precise timer not defined for this architecture. ^ WARNING: vmlinux.o(.text+0x27b40): Section mismatch in reference from the function .setup_rfi_flush() to the function .init.text:.ppc64_bolted_size() The function .setup_rfi_flush() references the function __init .ppc64_bolted_size(). This is often because .setup_rfi_flush lacks a __init annotation or the annotation of .ppc64_bolted_size is wrong. WARNING: vmlinux.o(.text+0x27b70): Section mismatch in reference from the function .setup_rfi_flush() to the function .init.text:.memblock_alloc_base() The function .setup_rfi_flush() references the function __init .memblock_alloc_base(). This is often because .setup_rfi_flush lacks a __init annotation or the annotation of .memblock_alloc_base is wrong. WARNING: vmlinux.o(.text+0x3d8f7c): Section mismatch in reference from the function .devm_memremap_pages() to the function .meminit.text:.arch_add_memory() The function .devm_memremap_pages() references the function __meminit .arch_add_memory(). This is often because .devm_memremap_pages lacks a __meminit annotation or the annotation of .arch_add_memory is wrong. WARNING: vmlinux.o(.text+0x3d9878): Section mismatch in reference from the function .devm_memremap_pages_release() to the function .meminit.text:.arch_remove_memory() The function .devm_memremap_pages_release() references the function __meminit .arch_remove_memory(). This is often because .devm_memremap_pages_release lacks a __meminit annotation or the annotation of .arch_remove_memory is wrong. WARNING: vmlinux.o(.text+0x500f5c): Section mismatch in reference from the function .hmm_devmem_release() to the function .meminit.text:.arch_remove_memory() The function .hmm_devmem_release() references the function __meminit .arch_remove_memory(). This is often because .hmm_devmem_release lacks a __meminit annotation or the annotation of .arch_remove_memory is wrong. WARNING: vmlinux.o(.text+0x501438): Section mismatch in reference from the function .hmm_devmem_pages_create() to the function .meminit.text:.arch_add_memory() The function .hmm_devmem_pages_create() references the function __meminit .arch_add_memory(). This is often because .hmm_devmem_pages_create lacks a __meminit annotation or the annotation of .arch_add_memory is wrong. Completed OK # rm -rf /kisskb/build/powerpc-next_powerpc-allyesconfig_powerpc-5.3 # Build took: 0:14:13.409688