# git rev-parse -q --verify 8fc63a91e785ef06fb7f1aba59297793d85095f7^{commit}
8fc63a91e785ef06fb7f1aba59297793d85095f7
already have revision, skipping fetch
# git checkout -q -f -B kisskb 8fc63a91e785ef06fb7f1aba59297793d85095f7
# git clean -qxdf
# < git log -1
# commit 8fc63a91e785ef06fb7f1aba59297793d85095f7
# Merge: 6f4b7052daa0 c46975715f5a
# Author: Michael Ellerman <mpe@ellerman.id.au>
# Date:   Fri Dec 15 13:51:56 2023 +1100
# 
#     Merge branch 'smp-topo' into next
#     
#     Merge a branch containing SMP topology updates from Srikar, purely so we can
#     include the cover letter which has a lot of good detail here:
#     
#     PowerVM systems configured in shared processors mode have some unique
#     challenges. Some device-tree properties will be missing on a shared
#     processor. Hence some sched domains may not make sense for shared processor
#     systems.
#     
#     Most shared processor systems are over-provisioned. Underlying PowerVM
#     Hypervisor would schedule at a Big Core (SMT8) granularity. The most recent
#     power processors support two almost independent cores. In a lightly loaded
#     condition, it helps the overall system performance if we pack to lesser number
#     of Big Cores.
#     
#     Since each thread-group is independent, running threads on both the
#     thread-groups of a SMT8 core, should have a minimal adverse impact in
#     non over provisioned scenarios. These changes in this patchset will not
#     affect in the over provisioned scenario.  If there are more threads than
#     SMT domains, then asym_packing will not kick-in.
#     
#     System Configuration
#     type=Shared mode=Uncapped smt=8 lcpu=96 mem=1066409344 kB cpus=96 ent=64.00
#     So *64 Entitled cores/ 96 Virtual processor* Scenario
#     
#     lscpu
#     Architecture:                       ppc64le
#     Byte Order:                         Little Endian
#     CPU(s):                             768
#     On-line CPU(s) list:                0-767
#     Model name:                         POWER10 (architected), altivec supported
#     Model:                              2.0 (pvr 0080 0200)
#     Thread(s) per core:                 8
#     Core(s) per socket:                 16
#     Socket(s):                          6
#     Hypervisor vendor:                  pHyp
#     Virtualization type:                para
#     L1d cache:                          6 MiB (192 instances)
#     L1i cache:                          9 MiB (192 instances)
#     NUMA node(s):                       6
#     NUMA node0 CPU(s):                  0-7,32-39,80-87,128-135,176-183,224-231,272-279,320-327,368-375,416-423,464-471,512-519,560-567,608-615,656-663,704-711,752-759
#     NUMA node1 CPU(s):                  8-15,40-47,88-95,136-143,184-191,232-239,280-287,328-335,376-383,424-431,472-479,520-527,568-575,616-623,664-671,712-719,760-767
#     NUMA node4 CPU(s):                  64-71,112-119,160-167,208-215,256-263,304-311,352-359,400-407,448-455,496-503,544-551,592-599,640-647,688-695,736-743
#     NUMA node5 CPU(s):                  16-23,48-55,96-103,144-151,192-199,240-247,288-295,336-343,384-391,432-439,480-487,528-535,576-583,624-631,672-679,720-727
#     NUMA node6 CPU(s):                  72-79,120-127,168-175,216-223,264-271,312-319,360-367,408-415,456-463,504-511,552-559,600-607,648-655,696-703,744-751
#     NUMA node7 CPU(s):                  24-31,56-63,104-111,152-159,200-207,248-255,296-303,344-351,392-399,440-447,488-495,536-543,584-591,632-639,680-687,728-735
#     
#     ebizzy -t 32 -S 200 (5 iterations) Records per second. (Higher is better)
#     Kernel     N  Min      Max      Median   Avg        Stddev     %Change
#     6.6.0-rc3  5  3840178  4059268  3978042  3973936.6  84264.456
#     +patch     5  3768393  3927901  3874994  3854046    71532.926  -3.01692
#     
#     >From lparstat (when the workload stabilized)
#     Kernel     %user  %sys  %wait  %idle  physc  %entc  lbusy  app    vcsw       phint
#     6.6.0-rc3  4.16   0.00  0.00   95.84  26.06  40.72  4.16   69.88  276906989  578
#     +patch     4.16   0.00  0.00   95.83  17.70  27.66  4.17   78.26  70436663   119
#     
#     ebizzy -t 128 -S 200 (5 iterations) Records per second. (Higher is better)
#     Kernel     N Min      Max      Median   Avg        Stddev     %Change
#     6.6.0-rc3  5 5520692  5981856  5717709  5727053.2  176093.2
#     +patch     5 5305888  6259610  5854590  5843311    375917.03  2.02998
#     
#     >From lparstat (when the workload stabilized)
#     Kernel     %user  %sys  %wait  %idle  physc  %entc  lbusy  app    vcsw       phint
#     6.6.0-rc3  16.66  0.00  0.00   83.33  45.49  71.08  16.67  50.50  288778533  581
#     +patch     16.65  0.00  0.00   83.35  30.15  47.11  16.65  65.76  85196150   133
#     
#     ebizzy -t 512 -S 200 (5 iterations) Records per second. (Higher is better)
#     Kernel     N  Min       Max       Median    Avg       Stddev     %Change
#     6.6.0-rc3  5  19563921  20049955  19701510  19728733  198295.18
#     +patch     5  19455992  20176445  19718427  19832017  304094.05  0.523521
#     
#     >From lparstat (when the workload stabilized)
#     %Kernel     user  %sys  %wait  %idle  physc  %entc   lbusy  app   vcsw       phint
#     66.6.0-rc3  6.44  0.01  0.00   33.55  94.14  147.09  66.45  1.33  313345175  621
#     6+patch     6.44  0.01  0.00   33.55  94.15  147.11  66.45  1.33  109193889  309
#     
#     System Configuration
#     type=Shared mode=Uncapped smt=8 lcpu=40 mem=1067539392 kB cpus=96 ent=40.00
#     So *40 Entitled cores/ 40 Virtual processor* Scenario
#     
#     lscpu
#     Architecture:                       ppc64le
#     Byte Order:                         Little Endian
#     CPU(s):                             320
#     On-line CPU(s) list:                0-319
#     Model name:                         POWER10 (architected), altivec supported
#     Model:                              2.0 (pvr 0080 0200)
#     Thread(s) per core:                 8
#     Core(s) per socket:                 10
#     Socket(s):                          4
#     Hypervisor vendor:                  pHyp
#     Virtualization type:                para
#     L1d cache:                          2.5 MiB (80 instances)
#     L1i cache:                          3.8 MiB (80 instances)
#     NUMA node(s):                       4
#     NUMA node0 CPU(s):                  0-7,32-39,64-71,96-103,128-135,160-167,192-199,224-231,256-263,288-295
#     NUMA node1 CPU(s):                  8-15,40-47,72-79,104-111,136-143,168-175,200-207,232-239,264-271,296-303
#     NUMA node4 CPU(s):                  16-23,48-55,80-87,112-119,144-151,176-183,208-215,240-247,272-279,304-311
#     NUMA node5 CPU(s):                  24-31,56-63,88-95,120-127,152-159,184-191,216-223,248-255,280-287,312-319
#     
#     ebizzy -t 32 -S 200 (5 iterations) Records per second. (Higher is better)
#     Kernel     N   Min      Max      Median   Avg        Stddev     %Change
#     6.6.0-rc3  5   3535518  3864532  3745967  3704233.2  130216.76
#     +patch     5   3608385  3708026  3649379  3651596.6  37862.163  -1.42099
#     
#     %Kernel    user   %sys  %wait  %idle  physc  %entc  lbusy  app    vcsw     phint
#     6.6.0-rc3  10.00  0.01  0.00   89.99  22.98  57.45  10.01  41.01  1135139  262
#     +patch     10.00  0.00  0.00   90.00  16.95  42.37  10.00  47.05  925561   19
#     
#     ebizzy -t 64 -S 200 (5 iterations) Records per second. (Higher is better)
#     Kernel     N   Min      Max      Median   Avg        Stddev     %Change
#     6.6.0-rc3  5   4434984  4957281  4548786  4591298.2  211770.2
#     +patch     5   4461115  4835167  4544716  4607795.8  151474.85  0.359323
#     
#     %Kernel    user   %sys  %wait  %idle  physc  %entc  lbusy  app    vcsw     phint
#     6.6.0-rc3  20.01  0.00  0.00   79.99  38.22  95.55  20.01  25.77  1287553  265
#     +patch     19.99  0.00  0.00   80.01  25.55  63.88  19.99  38.44  1077341  20
#     
#     ebizzy -t 256 -S 200 (5 iterations) Records per second. (Higher is better)
#     Kernel     N   Min      Max      Median   Avg        Stddev     %Change
#     6.6.0-rc3  5   8850648  8982659  8951911  8936869.2  52278.031
#     +patch     5   8751038  9060510  8981409  8942268.4  117070.6   0.0604149
#     
#     %Kernel    user   %sys  %wait  %idle  physc  %entc   lbusy  app    vcsw     phint
#     6.6.0-rc3  80.02  0.01  0.01   19.96  40.00  100.00  80.03  24.00  1597665  276
#     +patch     80.02  0.01  0.01   19.96  40.00  100.00  80.03  23.99  1383921  63
#     
#     Observation:
#     We are able to see Improvement in ebizzy throughput even with lesser
#     core utilization (almost half the core utilization) in low utilization
#     scenarios while still retaining throughput in mid and higher utilization
#     scenarios.
#     Note: The numbers are with Uncapped + no-noise case. In the Capped and/or
#     noise case, due to contention on the Cores, the numbers are expected to
#     further improve.
#     
#     Note: The numbers included (sched/fair: Enable group_asym_packing in find_idlest_group)
#     https://lore.kernel.org/all/20231018155036.2314342-1-srikar@linux.vnet.ibm.com/
# < /opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc --version
# < /opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld --version
# < git log --format=%s --max-count=1 8fc63a91e785ef06fb7f1aba59297793d85095f7
# make -s -j 40 ARCH=powerpc O=/kisskb/build/powerpc-next_mpc85xx_smp_defconfig_powerpc-gcc5 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-  mpc85xx_smp_defconfig
Using /kisskb/src/arch/powerpc/configs/mpc85xx_base.config as base
Merging /kisskb/src/arch/powerpc/configs/85xx-32bit.config
Merging /kisskb/src/arch/powerpc/configs/85xx-smp.config
Merging /kisskb/src/arch/powerpc/configs/85xx-hw.config
Merging /kisskb/src/arch/powerpc/configs/fsl-emb-nonhw.config
#
# merged configuration written to .config (needs make)
#
# < make -s -j 40 ARCH=powerpc O=/kisskb/build/powerpc-next_mpc85xx_smp_defconfig_powerpc-gcc5 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-  help
# make -s -j 40 ARCH=powerpc O=/kisskb/build/powerpc-next_mpc85xx_smp_defconfig_powerpc-gcc5 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-  olddefconfig
# make -s -j 40 ARCH=powerpc O=/kisskb/build/powerpc-next_mpc85xx_smp_defconfig_powerpc-gcc5 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-5.5.0-nolibc/powerpc64-linux/bin/powerpc64-linux-  
In file included from /kisskb/src/include/linux/build_bug.h:5:0,
                 from /kisskb/src/include/linux/container_of.h:5,
                 from /kisskb/src/include/linux/list.h:5,
                 from /kisskb/src/include/linux/module.h:12,
                 from /kisskb/src/drivers/net/ethernet/freescale/fs_enet/mac-scc.c:15:
/kisskb/src/drivers/net/ethernet/freescale/fs_enet/mac-scc.c: In function 'allocate_bd':
/kisskb/src/include/linux/err.h:28:49: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
 #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
                                                 ^
/kisskb/src/include/linux/compiler.h:77:42: note: in definition of macro 'unlikely'
 # define unlikely(x) __builtin_expect(!!(x), 0)
                                          ^
/kisskb/src/drivers/net/ethernet/freescale/fs_enet/mac-scc.c:138:6: note: in expansion of macro 'IS_ERR_VALUE'
  if (IS_ERR_VALUE(fep->ring_mem_addr))
      ^
/kisskb/src/drivers/net/ethernet/freescale/fs_enet/mac-scc.c: At top level:
cc1: warning: unrecognized command line option '-Wno-shift-negative-value'
Completed OK
# rm -rf /kisskb/build/powerpc-next_mpc85xx_smp_defconfig_powerpc-gcc5
# Build took: 0:01:22.400354