# git rev-parse -q --verify ac1788cc7da4ce54edcfd2e499afdb0a23d5c41d^{commit} ac1788cc7da4ce54edcfd2e499afdb0a23d5c41d already have revision, skipping fetch # git checkout -q -f -B kisskb ac1788cc7da4ce54edcfd2e499afdb0a23d5c41d # git clean -qxdf # < git log -1 # commit ac1788cc7da4ce54edcfd2e499afdb0a23d5c41d # Author: Srikar Dronamraju # Date: Fri Sep 28 09:17:32 2018 +0530 # # powerpc/numa: Skip onlining a offline node in kdump path # # With commit 2ea626306810 ("powerpc/topology: Get topology for shared # processors at boot"), kdump kernel on shared LPAR may crash. # # The necessary conditions are # - Shared LPAR with at least 2 nodes having memory and CPUs. # - Memory requirement for kdump kernel must be met by the first N-1 # nodes where there are at least N nodes with memory and CPUs. # # Example numactl of such a machine. # $ numactl -H # available: 5 nodes (0,2,5-7) # node 0 cpus: # node 0 size: 0 MB # node 0 free: 0 MB # node 2 cpus: # node 2 size: 255 MB # node 2 free: 189 MB # node 5 cpus: 24 25 26 27 28 29 30 31 # node 5 size: 4095 MB # node 5 free: 4024 MB # node 6 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23 # node 6 size: 6353 MB # node 6 free: 5998 MB # node 7 cpus: 8 9 10 11 12 13 14 15 32 33 34 35 36 37 38 39 # node 7 size: 7640 MB # node 7 free: 7164 MB # node distances: # node 0 2 5 6 7 # 0: 10 40 40 40 40 # 2: 40 10 40 40 40 # 5: 40 40 10 40 40 # 6: 40 40 40 10 20 # 7: 40 40 40 20 10 # # Steps to reproduce. # 1. Load / start kdump service. # 2. Trigger a kdump (for example : echo c > /proc/sysrq-trigger) # # When booting a kdump kernel with 2048M: # # kexec: Starting switchover sequence. # I'm in purgatory # Using 1TB segments # hash-mmu: Initializing hash mmu with SLB # Linux version 4.19.0-rc5-master+ (srikar@linux-xxu6) (gcc version 4.8.5 (SUSE Linux)) #1 SMP Thu Sep 27 19:45:00 IST 2018 # Found initrd at 0xc000000009e70000:0xc00000000ae554b4 # Using pSeries machine description # ----------------------------------------------------- # ppc64_pft_size = 0x1e # phys_mem_size = 0x88000000 # dcache_bsize = 0x80 # icache_bsize = 0x80 # cpu_features = 0x000000ff8f5d91a7 # possible = 0x0000fbffcf5fb1a7 # always = 0x0000006f8b5c91a1 # cpu_user_features = 0xdc0065c2 0xef000000 # mmu_features = 0x7c006001 # firmware_features = 0x00000007c45bfc57 # htab_hash_mask = 0x7fffff # physical_start = 0x8000000 # ----------------------------------------------------- # numa: NODE_DATA [mem 0x87d5e300-0x87d67fff] # numa: NODE_DATA(0) on node 6 # numa: NODE_DATA [mem 0x87d54600-0x87d5e2ff] # Top of RAM: 0x88000000, Total RAM: 0x88000000 # Memory hole size: 0MB # Zone ranges: # DMA [mem 0x0000000000000000-0x0000000087ffffff] # DMA32 empty # Normal empty # Movable zone start for each node # Early memory node ranges # node 6: [mem 0x0000000000000000-0x0000000087ffffff] # Could not find start_pfn for node 0 # Initmem setup node 0 [mem 0x0000000000000000-0x0000000000000000] # On node 0 totalpages: 0 # Initmem setup node 6 [mem 0x0000000000000000-0x0000000087ffffff] # On node 6 totalpages: 34816 # # Unable to handle kernel paging request for data at address 0x00000060 # Faulting instruction address: 0xc000000008703a54 # Oops: Kernel access of bad area, sig: 11 [#1] # LE SMP NR_CPUS=2048 NUMA pSeries # Modules linked in: # CPU: 11 PID: 1 Comm: swapper/11 Not tainted 4.19.0-rc5-master+ #1 # NIP: c000000008703a54 LR: c000000008703a38 CTR: 0000000000000000 # REGS: c00000000b673440 TRAP: 0380 Not tainted (4.19.0-rc5-master+) # MSR: 8000000002009033 CR: 24022022 XER: 20000002 # CFAR: c0000000086fc238 IRQMASK: 0 # GPR00: c000000008703a38 c00000000b6736c0 c000000009281900 0000000000000000 # GPR04: 0000000000000000 0000000000000000 fffffffffffff001 c00000000b660080 # GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000220 # GPR12: 0000000000002200 c000000009e51400 0000000000000000 0000000000000008 # GPR16: 0000000000000000 c000000008c152e8 c000000008c152a8 0000000000000000 # GPR20: c000000009422fd8 c000000009412fd8 c000000009426040 0000000000000008 # GPR24: 0000000000000000 0000000000000000 c000000009168bc8 c000000009168c78 # GPR28: c00000000b126410 0000000000000000 c00000000916a0b8 c00000000b126400 # NIP [c000000008703a54] bus_add_device+0x84/0x1e0 # LR [c000000008703a38] bus_add_device+0x68/0x1e0 # Call Trace: # [c00000000b6736c0] [c000000008703a38] bus_add_device+0x68/0x1e0 (unreliable) # [c00000000b673740] [c000000008700194] device_add+0x454/0x7c0 # [c00000000b673800] [c00000000872e660] __register_one_node+0xb0/0x240 # [c00000000b673860] [c00000000839a6bc] __try_online_node+0x12c/0x180 # [c00000000b673900] [c00000000839b978] try_online_node+0x58/0x90 # [c00000000b673930] [c0000000080846d8] find_and_online_cpu_nid+0x158/0x190 # [c00000000b673a10] [c0000000080848a0] numa_update_cpu_topology+0x190/0x580 # [c00000000b673c00] [c000000008d3f2e4] smp_cpus_done+0x94/0x108 # [c00000000b673c70] [c000000008d5c00c] smp_init+0x174/0x19c # [c00000000b673d00] [c000000008d346b8] kernel_init_freeable+0x1e0/0x450 # [c00000000b673dc0] [c0000000080102e8] kernel_init+0x28/0x160 # [c00000000b673e30] [c00000000800b65c] ret_from_kernel_thread+0x5c/0x80 # Instruction dump: # 60000000 60000000 e89e0020 7fe3fb78 4bff87d5 60000000 7c7d1b79 4082008c # e8bf0050 e93e0098 3b9f0010 2fa50000 38630018 419e0114 7f84e378 # ---[ end trace 593577668c2daa65 ]--- # # However a regular kernel with 4096M (2048 gets reserved for crash # kernel) boots properly. # # Unlike regular kernels, which mark all available nodes as online, # kdump kernel only marks just enough nodes as online and marks the rest # as offline at boot. However kdump kernel boots with all available # CPUs. With Commit 2ea626306810 ("powerpc/topology: Get topology for # shared processors at boot"), all CPUs are onlined on their respective # nodes at boot time. try_online_node() tries to online the offline # nodes but fails as all needed subsystems are not yet initialized. # # As part of fix, detect and skip early onlining of a offline node. # # Fixes: 2ea626306810 ("powerpc/topology: Get topology for shared processors at boot") # Reported-by: Pavithra Prakash # Signed-off-by: Srikar Dronamraju # Tested-by: Hari Bathini # Signed-off-by: Michael Ellerman # < /opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc --version # < git log --format=%s --max-count=1 ac1788cc7da4ce54edcfd2e499afdb0a23d5c41d # < make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-fixes_chrp32_defconfig_powerpc-gcc5 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux- chrp32_defconfig # make -s -j 48 ARCH=powerpc O=/kisskb/build/powerpc-fixes_chrp32_defconfig_powerpc-gcc5 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux- Completed OK # rm -rf /kisskb/build/powerpc-fixes_chrp32_defconfig_powerpc-gcc5 # Build took: 0:00:41.896243