# git rev-parse -q --verify 8cb1ae19bfae92def42c985417cd6e894ddaa047^{commit} 8cb1ae19bfae92def42c985417cd6e894ddaa047 already have revision, skipping fetch # git checkout -q -f -B kisskb 8cb1ae19bfae92def42c985417cd6e894ddaa047 # git clean -qxdf # < git log -1 # commit 8cb1ae19bfae92def42c985417cd6e894ddaa047 # Merge: 7d20dd3294b3 d7a9590f608d # Author: Linus Torvalds # Date: Mon Nov 1 14:03:56 2021 -0700 # # Merge tag 'x86-fpu-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip # # Pull x86 fpu updates from Thomas Gleixner: # # - Cleanup of extable fixup handling to be more robust, which in turn # allows to make the FPU exception fixups more robust as well. # # - Change the return code for signal frame related failures from # explicit error codes to a boolean fail/success as that's all what the # calling code evaluates. # # - A large refactoring of the FPU code to prepare for adding AMX # support: # # - Distangle the public header maze and remove especially the # misnomed kitchen sink internal.h which is despite it's name # included all over the place. # # - Add a proper abstraction for the register buffer storage (struct # fpstate) which allows to dynamically size the buffer at runtime # by flipping the pointer to the buffer container from the default # container which is embedded in task_struct::tread::fpu to a # dynamically allocated container with a larger register buffer. # # - Convert the code over to the new fpstate mechanism. # # - Consolidate the KVM FPU handling by moving the FPU related code # into the FPU core which removes the number of exports and avoids # adding even more export when AMX has to be supported in KVM. # This also removes duplicated code which was of course # unnecessary different and incomplete in the KVM copy. # # - Simplify the KVM FPU buffer handling by utilizing the new # fpstate container and just switching the buffer pointer from the # user space buffer to the KVM guest buffer when entering # vcpu_run() and flipping it back when leaving the function. This # cuts the memory requirements of a vCPU for FPU buffers in half # and avoids pointless memory copy operations. # # This also solves the so far unresolved problem of adding AMX # support because the current FPU buffer handling of KVM inflicted # a circular dependency between adding AMX support to the core and # to KVM. With the new scheme of switching fpstate AMX support can # be added to the core code without affecting KVM. # # - Replace various variables with proper data structures so the # extra information required for adding dynamically enabled FPU # features (AMX) can be added in one place # # - Add AMX (Advanced Matrix eXtensions) support (finally): # # AMX is a large XSTATE component which is going to be available with # Saphire Rapids XEON CPUs. The feature comes with an extra MSR # (MSR_XFD) which allows to trap the (first) use of an AMX related # instruction, which has two benefits: # # 1) It allows the kernel to control access to the feature # # 2) It allows the kernel to dynamically allocate the large register # state buffer instead of burdening every task with the the extra # 8K or larger state storage. # # It would have been great to gain this kind of control already with # AVX512. # # The support comes with the following infrastructure components: # # 1) arch_prctl() to # - read the supported features (equivalent to XGETBV(0)) # - read the permitted features for a task # - request permission for a dynamically enabled feature # # Permission is granted per process, inherited on fork() and # cleared on exec(). The permission policy of the kernel is # restricted to sigaltstack size validation, but the syscall # obviously allows further restrictions via seccomp etc. # # 2) A stronger sigaltstack size validation for sys_sigaltstack(2) # which takes granted permissions and the potentially resulting # larger signal frame into account. This mechanism can also be used # to enforce factual sigaltstack validation independent of dynamic # features to help with finding potential victims of the 2K # sigaltstack size constant which is broken since AVX512 support # was added. # # 3) Exception handling for #NM traps to catch first use of a extended # feature via a new cause MSR. If the exception was caused by the # use of such a feature, the handler checks permission for that # feature. If permission has not been granted, the handler sends a # SIGILL like the #UD handler would do if the feature would have # been disabled in XCR0. If permission has been granted, then a new # fpstate which fits the larger buffer requirement is allocated. # # In the unlikely case that this allocation fails, the handler # sends SIGSEGV to the task. That's not elegant, but unavoidable as # the other discussed options of preallocation or full per task # permissions come with their own set of horrors for kernel and/or # userspace. So this is the lesser of the evils and SIGSEGV caused # by unexpected memory allocation failures is not a fundamentally # new concept either. # # When allocation succeeds, the fpstate properties are filled in to # reflect the extended feature set and the resulting sizes, the # fpu::fpstate pointer is updated accordingly and the trap is # disarmed for this task permanently. # # 4) Enumeration and size calculations # # 5) Trap switching via MSR_XFD # # The XFD (eXtended Feature Disable) MSR is context switched with # the same life time rules as the FPU register state itself. The # mechanism is keyed off with a static key which is default # disabled so !AMX equipped CPUs have zero overhead. On AMX enabled # CPUs the overhead is limited by comparing the tasks XFD value # with a per CPU shadow variable to avoid redundant MSR writes. In # case of switching from a AMX using task to a non AMX using task # or vice versa, the extra MSR write is obviously inevitable. # # All other places which need to be aware of the variable feature # sets and resulting variable sizes are not affected at all because # they retrieve the information (feature set, sizes) unconditonally # from the fpstate properties. # # 6) Enable the new AMX states # # Note, this is relatively new code despite the fact that AMX support # is in the works for more than a year now. # # The big refactoring of the FPU code, which allowed to do a proper # integration has been started exactly 3 weeks ago. Refactoring of the # existing FPU code and of the original AMX patches took a week and has # been subject to extensive review and testing. The only fallout which # has not been caught in review and testing right away was restricted # to AMX enabled systems, which is completely irrelevant for anyone # outside Intel and their early access program. There might be dragons # lurking as usual, but so far the fine grained refactoring has held up # and eventual yet undetected fallout is bisectable and should be # easily addressable before the 5.16 release. Famous last words... # # Many thanks to Chang Bae and Dave Hansen for working hard on this and # also to the various test teams at Intel who reserved extra capacity # to follow the rapid development of this closely which provides the # confidence level required to offer this rather large update for # inclusion into 5.16-rc1 # # * tag 'x86-fpu-2021-11-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (110 commits) # Documentation/x86: Add documentation for using dynamic XSTATE features # x86/fpu: Include vmalloc.h for vzalloc() # selftests/x86/amx: Add context switch test # selftests/x86/amx: Add test cases for AMX state management # x86/fpu/amx: Enable the AMX feature in 64-bit mode # x86/fpu: Add XFD handling for dynamic states # x86/fpu: Calculate the default sizes independently # x86/fpu/amx: Define AMX state components and have it used for boot-time checks # x86/fpu/xstate: Prepare XSAVE feature table for gaps in state component numbers # x86/fpu/xstate: Add fpstate_realloc()/free() # x86/fpu/xstate: Add XFD #NM handler # x86/fpu: Update XFD state where required # x86/fpu: Add sanity checks for XFD # x86/fpu: Add XFD state to fpstate # x86/msr-index: Add MSRs for XFD # x86/cpufeatures: Add eXtended Feature Disabling (XFD) feature bit # x86/fpu: Reset permission and fpstate on exec() # x86/fpu: Prepare fpu_clone() for dynamically enabled features # x86/fpu/signal: Prepare for variable sigframe length # x86/signal: Use fpu::__state_user_size for sigalt stack validation # ... # < /opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-gcc --version # < /opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld --version # < git log --format=%s --max-count=1 8cb1ae19bfae92def42c985417cd6e894ddaa047 # < make -s -j 24 ARCH=powerpc O=/kisskb/build/linus-rand_powerpc-randconfig_powerpc-gcc11 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux- randconfig # Added to kconfig CONFIG_STANDALONE=y # Added to kconfig CONFIG_BUILD_DOCSRC=n # Added to kconfig CONFIG_MODULE_SIG=n # Added to kconfig CONFIG_CPU_BIG_ENDIAN=y # Added to kconfig CONFIG_PPC64=y # Added to kconfig CONFIG_PPC_DISABLE_WERROR=y # Added to kconfig CONFIG_SECTION_MISMATCH_WARN_ONLY=y # Added to kconfig CONFIG_PREVENT_FIRMWARE_BUILD=y # Added to kconfig CONFIG_CC_STACKPROTECTOR_STRONG=n # Added to kconfig CONFIG_GCC_PLUGINS=n # Added to kconfig CONFIG_LD_HEAD_STUB_CATCH=y # Added to kconfig CONFIG_TRIM_UNUSED_KSYMS=n # Added to kconfig CONFIG_UBSAN=n # < make -s -j 24 ARCH=powerpc O=/kisskb/build/linus-rand_powerpc-randconfig_powerpc-gcc11 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux- help # make -s -j 24 ARCH=powerpc O=/kisskb/build/linus-rand_powerpc-randconfig_powerpc-gcc11 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux- olddefconfig .config:5858:warning: override: reassigning to symbol CPU_BIG_ENDIAN .config:5858:warning: override: CPU_BIG_ENDIAN changes choice state .config:5860:warning: override: reassigning to symbol PPC_DISABLE_WERROR .config:5862:warning: override: reassigning to symbol PREVENT_FIRMWARE_BUILD .config:5864:warning: override: reassigning to symbol GCC_PLUGINS .config:5865:warning: override: reassigning to symbol LD_HEAD_STUB_CATCH .config:5867:warning: override: reassigning to symbol UBSAN # make -s -j 24 ARCH=powerpc O=/kisskb/build/linus-rand_powerpc-randconfig_powerpc-gcc11 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux- In file included from /kisskb/src/include/linux/mmzone.h:22, from /kisskb/src/include/linux/gfp.h:6, from /kisskb/src/include/linux/xarray.h:14, from /kisskb/src/include/linux/radix-tree.h:19, from /kisskb/src/include/linux/fs.h:15, from /kisskb/src/include/linux/compat.h:17, from /kisskb/src/arch/powerpc/kernel/asm-offsets.c:14: /kisskb/src/include/linux/page-flags.h:806:29: error: macro "PAGEFLAG_FALSE" requires 2 arguments, but only 1 given 806 | PAGEFLAG_FALSE(HasHWPoisoned) | ^ /kisskb/src/include/linux/page-flags.h:411: note: macro "PAGEFLAG_FALSE" defined here 411 | #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ | /kisskb/src/include/linux/page-flags.h:807:39: error: macro "TESTSCFLAG_FALSE" requires 2 arguments, but only 1 given 807 | TESTSCFLAG_FALSE(HasHWPoisoned) | ^ /kisskb/src/include/linux/page-flags.h:414: note: macro "TESTSCFLAG_FALSE" defined here 414 | #define TESTSCFLAG_FALSE(uname, lname) \ | /kisskb/src/include/linux/page-flags.h:806:1: error: unknown type name 'PAGEFLAG_FALSE' 806 | PAGEFLAG_FALSE(HasHWPoisoned) | ^~~~~~~~~~~~~~ /kisskb/src/include/linux/page-flags.h:807:25: error: expected ';' before 'static' 807 | TESTSCFLAG_FALSE(HasHWPoisoned) | ^ | ; ...... 815 | static inline bool is_page_hwpoison(struct page *page) | ~~~~~~ make[2]: *** [/kisskb/src/scripts/Makefile.build:121: arch/powerpc/kernel/asm-offsets.s] Error 1 make[1]: *** [/kisskb/src/Makefile:1220: prepare0] Error 2 make: *** [Makefile:219: __sub-make] Error 2 Command 'make -s -j 24 ARCH=powerpc O=/kisskb/build/linus-rand_powerpc-randconfig_powerpc-gcc11 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-11.1.0-nolibc/powerpc64-linux/bin/powerpc64-linux- ' returned non-zero exit status 2 # rm -rf /kisskb/build/linus-rand_powerpc-randconfig_powerpc-gcc11 # Build took: 0:00:12.959991