# git rev-parse -q --verify 535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f^{commit} 535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f already have revision, skipping fetch # git checkout -q -f -B kisskb 535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f # git clean -qxdf # < git log -1 # commit 535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f # Merge: fd3a5940e66d 45fc4628c15a # Author: Linus Torvalds # Date: Sat Sep 9 20:06:17 2023 -0700 # # Merge tag 'perf-tools-for-v6.6-1-2023-09-05' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools # # Pull perf tools updates from Arnaldo Carvalho de Melo: # "perf tools maintainership: # # - Add git information for perf-tools and perf-tools-next trees and # branches to the MAINTAINERS file. That is where development now # takes place and myself and Namhyung Kim have write access, more # people to come as we emulate other maintainer groups. # # perf record: # # - Record kernel data maps when 'perf record --data' is used, so that # global variables can be resolved and used in tools that do data # profiling. # # perf trace: # # - Remove the old, experimental support for BPF events in which a .c # file was passed as an event: "perf trace -e hello.c" to then get # compiled and loaded. # # The only known usage for that, that shipped with the kernel as an # example for such events, augmented the raw_syscalls tracepoints and # was converted to a libbpf skeleton, reusing all the user space # components and the BPF code connected to the syscalls. # # In the end just the way to glue the BPF part and the user space # type beautifiers changed, now being performed by libbpf skeletons. # # The next step is to use BTF to do pretty printing of all syscall # types, as discussed with Alan Maguire and others. # # Now, on a perf built with BUILD_BPF_SKEL=1 we get most if not all # path/filenames/strings, some of the networking data structures, # perf_event_attr, etc, i.e. systemwide tracing of nanosleep calls # and perf_event_open syscalls while 'perf stat' runs 'sleep' for 5 # seconds: # # # perf trace -a -e *nanosleep,perf* perf stat -e cycles,instructions sleep 5 # 0.000 ( 9.034 ms): perf/327641 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 327642 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 # 9.039 ( 0.006 ms): perf/327641 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 327642 (perf-exec), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 # ? ( ): gpm/991 ... [continued]: clock_nanosleep()) = 0 # 10.133 ( ): sleep/327642 clock_nanosleep(rqtp: { .tv_sec: 5, .tv_nsec: 0 }, rmtp: 0x7ffd36f83ed0) ... # ? ( ): pool-gsd-smart/3051 ... [continued]: clock_nanosleep()) = 0 # 30.276 ( ): gpm/991 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffcc6f73710) ... # 223.215 (1000.430 ms): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) = 0 # 30.276 (2000.394 ms): gpm/991 ... [continued]: clock_nanosleep()) = 0 # 1230.814 ( ): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) ... # 1230.814 (1000.404 ms): pool-gsd-smart/3051 ... [continued]: clock_nanosleep()) = 0 # 2030.886 ( ): gpm/991 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffcc6f73710) ... # 2237.709 (1000.153 ms): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) = 0 # ? ( ): crond/1172 ... [continued]: clock_nanosleep()) = 0 # 3242.699 ( ): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) ... # 2030.886 (2000.385 ms): gpm/991 ... [continued]: clock_nanosleep()) = 0 # 3728.078 ( ): crond/1172 clock_nanosleep(rqtp: { .tv_sec: 60, .tv_nsec: 0 }, rmtp: 0x7ffe0971dcf0) ... # 3242.699 (1000.158 ms): pool-gsd-smart/3051 ... [continued]: clock_nanosleep()) = 0 # 4031.409 ( ): gpm/991 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffcc6f73710) ... # 10.133 (5000.375 ms): sleep/327642 ... [continued]: clock_nanosleep()) = 0 # # Performance counter stats for 'sleep 5': # # 2,617,347 cycles # 1,855,997 instructions # 0.71 insn per cycle # # 5.002282128 seconds time elapsed # # 0.000855000 seconds user # 0.000852000 seconds sys # # perf annotate: # # - Building with binutils' libopcode now is opt-in (BUILD_NONDISTRO=1) # for licensing reasons, and we missed a build test on # tools/perf/tests makefile. # # Since we now default to NDEBUG=1, we ended up segfaulting when # building with BUILD_NONDISTRO=1 because a needed initialization # routine was being "error checked" via an assert. # # Fix it by explicitly checking the result and aborting instead if it # fails. # # We better back propagate the error, but at least 'perf annotate' on # samples collected for a BPF program is back working when perf is # built with BUILD_NONDISTRO=1. # # perf report/top: # # - Add back TUI hierarchy mode header, that is seen when using 'perf # report/top --hierarchy'. # # - Fix the number of entries for 'e' key in the TUI that was # preventing navigation of lines when expanding an entry. # # perf report/script: # # - Support cross platform register handling, allowing a perf.data file # collected on one architecture to have registers sampled correctly # displayed when analysis tools such as 'perf report' and 'perf # script' are used on a different architecture. # # - Fix handling of event attributes in pipe mode, i.e. when one uses: # # perf record -o - | perf report -i - # # When no perf.data files are used. # # - Handle files generated via pipe mode with a version of perf and # then read also via pipe mode with a different version of perf, # where the event attr record may have changed, use the record size # field to properly support this version mismatch. # # perf probe: # # - Accessing global variables from uprobes isn't supported, make the # error message state that instead of stating that some minimal # kernel version is needed to have that feature. This seems just a # tool limitation, the kernel probably has all that is needed. # # perf tests: # # - Fix a reference count related leak in the dlfilter v0 API where the # result of a thread__find_symbol_fb() is not matched with an # addr_location__exit() to drop the reference counts of the resolved # components (machine, thread, map, symbol, etc). Add a dlfilter test # to make sure that doesn't regresses. # # - Lots of fixes for the 'perf test' written in shell script related # to problems found with the shellcheck utility. # # - Fixes for 'perf test' shell scripts testing features enabled when # perf is built with BUILD_BPF_SKEL=1, such as 'perf stat' bpf # counters. # # - Add perf record sample filtering test, things like the following # example, that gets implemented as a BPF filter attached to the # event: # # # perf record -e task-clock -c 10000 --filter 'ip < 0xffffffff00000000' # # - Improve the way the task_analyzer test checks if libtraceevent is # linked, using 'perf version --build-options' instead of the more # expensinve 'perf record -e "sched:sched_switch"'. # # - Add support for riscv in the mmap-basic test. (This went as well # via the RiscV tree, same contents). # # libperf: # # - Implement riscv mmap support (This went as well via the RiscV tree, # same contents). # # perf script: # # - New tool that converts perf.data files to the firefox profiler # format so that one can use the visualizer at # https://profiler.firefox.com/. Done by Anup Sharma as part of this # year's Google Summer of Code. # # One can generate the output and upload it to the web interface but # Anup also automated everything: # # perf script gecko -F 99 -a sleep 60 # # - Support syscall name parsing on arm64. # # - Print "cgroup" field on the same line as "comm". # # perf bench: # # - Add new 'uprobe' benchmark to measure the overhead of uprobes # with/without BPF programs attached to it. # # - breakpoints are not available on power9, skip that test. # # perf stat: # # - Add #num_cpus_online literal to be used in 'perf stat' metrics, and # add this extra 'perf test' check that exemplifies its purpose: # # TEST_ASSERT_VAL("#num_cpus_online", # expr__parse(&num_cpus_online, ctx, "#num_cpus_online") == 0); # TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0); # TEST_ASSERT_VAL("#num_cpus >= #num_cpus_online", num_cpus >= num_cpus_online); # # Miscellaneous: # # - Improve tool startup time by lazily reading PMU, JSON, sysfs data. # # - Improve error reporting in the parsing of events, passing YYLTYPE # to error routines, so that the output can show were the parsing # error was found. # # - Add 'perf test' entries to check the parsing of events # improvements. # # - Fix various leak for things detected by -fsanitize=address, mostly # things that would be freed at tool exit, including: # # - Free evsel->filter on the destructor. # # - Allow tools to register a thread->priv destructor and use it in # 'perf trace'. # # - Free evsel->priv in 'perf trace'. # # - Free string returned by synthesize_perf_probe_point() when the # caller fails to do all it needs. # # - Adjust various compiler options to not consider errors some # warnings when building with broken headers found in things like # python, flex, bison, as we otherwise build with -Werror. Some for # gcc, some for clang, some for some specific version of those, some # for some specific version of flex or bison, or some specific # combination of these components, bah. # # - Allow customization of clang options for BPF target, this helps # building on gentoo where there are other oddities where BPF targets # gets passed some compiler options intended for the native build, so # building with WERROR=0 helps while these oddities are fixed. # # - Dont pass ERR_PTR() values to perf_session__delete() in 'perf top' # and 'perf lock', fixing some segfaults when handling some odd # failures. # # - Add LTO build option. # # - Fix format of unordered lists in the perf docs # (tools/perf/Documentation) # # - Overhaul the bison files, using constructs such as YYNOMEM. # # - Remove unused tokens from the bison .y files. # # - Add more comments to various structs. # # - A few LoongArch enablement patches. # # Vendor events (JSON): # # - Add JSON metrics for Yitian 710 DDR (aarch64). Things like: # # EventName, BriefDescription # visible_window_limit_reached_rd, "At least one entry in read queue reaches the visible window limit.", # visible_window_limit_reached_wr, "At least one entry in write queue reaches the visible window limit.", # op_is_dqsosc_mpc , "A DQS Oscillator MPC command to DRAM.", # op_is_dqsosc_mrr , "A DQS Oscillator MRR command to DRAM.", # op_is_tcr_mrr , "A Temperature Compensated Refresh(TCR) MRR command to DRAM.", # # - Add AmpereOne metrics (aarch64). # # - Update N2 and V2 metrics (aarch64) and events using Arm telemetry # repo. # # - Update scale units and descriptions of common topdown metrics on # aarch64. Things like: # - "MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)", # - "BriefDescription": "Frontend bound L1 topdown metric", # + "MetricExpr": "100 * (stall_slot_frontend / (#slots * cpu_cycles))", # + "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the frontend of the processor.", # # - Update events for intel: meteorlake to 1.04, sapphirerapids to # 1.15, Icelake+ metric constraints. # # - Update files for the power10 platform" # # * tag 'perf-tools-for-v6.6-1-2023-09-05' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (217 commits) # perf parse-events: Fix driver config term # perf parse-events: Fixes relating to no_value terms # perf parse-events: Fix propagation of term's no_value when cloning # perf parse-events: Name the two term enums # perf list: Don't print Unit for "default_core" # perf vendor events intel: Fix modifier in tma_info_system_mem_parallel_reads for skylake # perf dlfilter: Avoid leak in v0 API test use of resolve_address() # perf metric: Add #num_cpus_online literal # perf pmu: Remove str from perf_pmu_alias # perf parse-events: Make common term list to strbuf helper # perf parse-events: Minor help message improvements # perf pmu: Avoid uninitialized use of alias->str # perf jevents: Use "default_core" for events with no Unit # perf test stat_bpf_counters_cgrp: Enhance perf stat cgroup BPF counter test # perf test shell stat_bpf_counters: Fix test on Intel # perf test shell record_bpf_filter: Skip 6.2 kernel # libperf: Get rid of attr.id field # perf tools: Convert to perf_record_header_attr_id() # libperf: Add perf_record_header_attr_id() # perf tools: Handle old data in PERF_RECORD_ATTR # ... # < /opt/cross/kisskb/korg/gcc-8.5.0-nolibc/x86_64-linux/bin/x86_64-linux-gcc --version # < /opt/cross/kisskb/korg/gcc-8.5.0-nolibc/x86_64-linux/bin/x86_64-linux-ld --version # < git log --format=%s --max-count=1 535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f # make -s -j 40 ARCH=x86 O=/kisskb/build/linus_x86_64_defconfig_x86_64-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.5.0-nolibc/x86_64-linux/bin/x86_64-linux- x86_64_defconfig # < make -s -j 40 ARCH=x86 O=/kisskb/build/linus_x86_64_defconfig_x86_64-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.5.0-nolibc/x86_64-linux/bin/x86_64-linux- help # make -s -j 40 ARCH=x86 O=/kisskb/build/linus_x86_64_defconfig_x86_64-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.5.0-nolibc/x86_64-linux/bin/x86_64-linux- olddefconfig # make -s -j 40 ARCH=x86 O=/kisskb/build/linus_x86_64_defconfig_x86_64-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.5.0-nolibc/x86_64-linux/bin/x86_64-linux- Completed OK # rm -rf /kisskb/build/linus_x86_64_defconfig_x86_64-gcc8 # Build took: 0:01:33.317395