# git rev-parse -q --verify c228d294f2040c3a5f5965ff04d4947d0bf6e7da^{commit} c228d294f2040c3a5f5965ff04d4947d0bf6e7da already have revision, skipping fetch # git checkout -q -f -B kisskb c228d294f2040c3a5f5965ff04d4947d0bf6e7da # git clean -qxdf # < git log -1 # commit c228d294f2040c3a5f5965ff04d4947d0bf6e7da # Author: Linus Torvalds # Date: Thu Jan 31 11:10:20 2019 -0800 # # x86: explicitly align IO accesses in memcpy_{to,from}io # # In commit 170d13ca3a2f ("x86: re-introduce non-generic memcpy_{to,from}io") # I made our copy from IO space use a separate copy routine rather than # rely on the generic memcpy. I did that because our generic memory copy # isn't actually well-defined when it comes to internal access ordering or # alignment, and will in fact depend on various CPUID flags. # # In particular, the default memcpy() for a modern Intel CPU will # generally be just a "rep movsb", which works reasonably well for # medium-sized memory copies of regular RAM, since the CPU will turn it # into fairly optimized microcode. # # However, for non-cached memory and IO, "rep movs" ends up being # horrendously slow and will just do the architectural "one byte at a # time" accesses implied by the movsb. # # At the other end of the spectrum, if you _don't_ end up using the "rep # movsb" code, you'd likely fall back to the software copy, which does # overlapping accesses for the tail, and may copy things backwards. # Again, for regular memory that's fine, for IO memory not so much. # # The thinking was that clearly nobody really cared (because things # worked), but some people had seen horrible performance due to the byte # accesses, so let's just revert back to our long ago version that dod # "rep movsl" for the bulk of the copy, and then fixed up the potentially # last few bytes of the tail with "movsw/b". # # Interestingly (and perhaps not entirely surprisingly), while that was # our original memory copy implementation, and had been used before for # IO, in the meantime many new users of memcpy_*io() had come about. And # while the access patterns for the memory copy weren't well-defined (so # arguably _any_ access pattern should work), in practice the "rep movsb" # case had been very common for the last several years. # # In particular Jarkko Sakkinen reported that the memcpy_*io() change # resuled in weird errors from his Geminilake NUC TPM module. # # And it turns out that the TPM TCG accesses according to spec require # that the accesses be # # (a) done strictly sequentially # # (b) be naturally aligned # # otherwise the TPM chip will abort the PCI transaction. # # And, in fact, the tpm_crb.c driver did this: # # memcpy_fromio(buf, priv->rsp, 6); # ... # memcpy_fromio(&buf[6], &priv->rsp[6], expected - 6); # # which really should never have worked in the first place, but back # before commit 170d13ca3a2f it *happened* to work, because the # memcpy_fromio() would be expanded to a regular memcpy, and # # (a) gcc would expand the first memcpy in-line, and turn it into a # 4-byte and a 2-byte read, and they happened to be in the right # order, and the alignment was right. # # (b) gcc would call "memcpy()" for the second one, and the machines that # had this TPM chip also apparently ended up always having ERMS # ("Enhanced REP MOVSB/STOSB instructions"), so we'd use the "rep # movbs" for that copy. # # In other words, basically by pure luck, the code happened to use the # right access sizes in the (two different!) memcpy() implementations to # make it all work. # # But after commit 170d13ca3a2f, both of the memcpy_fromio() calls # resulted in a call to the routine with the consistent memory accesses, # and in both cases it started out transferring with 4-byte accesses. # Which worked for the first copy, but resulted in the second copy doing a # 32-bit read at an address that was only 2-byte aligned. # # Jarkko is actually fixing the fragile code in the TPM driver, but since # this is an excellent example of why we absolutely must not use a generic # memcpy for IO accesses, _and_ an IO-specific one really should strive to # align the IO accesses, let's do exactly that. # # Side note: Jarkko also noted that the driver had been used on ARM # platforms, and had worked. That was because on 32-bit ARM, memcpy_*io() # ends up always doing byte accesses, and on 64-bit ARM it first does byte # accesses to align to 8-byte boundaries, and then does 8-byte accesses # for the bulk. # # So ARM actually worked by design, and the x86 case worked by pure luck. # # We *might* want to make x86-64 do the 8-byte case too. That should be a # pretty straightforward extension, but let's do one thing at a time. And # generally MMIO accesses aren't really all that performance-critical, as # shown by the fact that for a long time we just did them a byte at a # time, and very few people ever noticed. # # Reported-and-tested-by: Jarkko Sakkinen # Tested-by: Jerry Snitselaar # Cc: David Laight # Fixes: 170d13ca3a2f ("x86: re-introduce non-generic memcpy_{to,from}io") # Signed-off-by: Linus Torvalds # < /opt/cross/kisskb/gcc-4.6.3-nolibc/m68k-linux/bin/m68k-linux-gcc --version # < /opt/cross/kisskb/gcc-4.6.3-nolibc/m68k-linux/bin/m68k-linux-ld --version # < git log --format=%s --max-count=1 c228d294f2040c3a5f5965ff04d4947d0bf6e7da # < make -s -j 48 ARCH=m68k O=/kisskb/build/linus_m68k-defconfig_m68k CROSS_COMPILE=/opt/cross/kisskb/gcc-4.6.3-nolibc/m68k-linux/bin/m68k-linux- defconfig # make -s -j 48 ARCH=m68k O=/kisskb/build/linus_m68k-defconfig_m68k CROSS_COMPILE=/opt/cross/kisskb/gcc-4.6.3-nolibc/m68k-linux/bin/m68k-linux- :1240:2: warning: #warning syscall seccomp not implemented [-Wcpp] :1318:2: warning: #warning syscall pkey_mprotect not implemented [-Wcpp] :1321:2: warning: #warning syscall pkey_alloc not implemented [-Wcpp] :1324:2: warning: #warning syscall pkey_free not implemented [-Wcpp] :1333:2: warning: #warning syscall io_pgetevents not implemented [-Wcpp] :1336:2: warning: #warning syscall rseq not implemented [-Wcpp] /kisskb/src/arch/m68k/kernel/signal.c: In function 'mangle_kernel_stack': /kisskb/src/arch/m68k/kernel/signal.c:654:3: warning: variable length array 'buf' is used [-Wvla] /kisskb/src/arch/m68k/atari/config.c: In function 'atari_switches_setup': /kisskb/src/arch/m68k/atari/config.c:151:2: warning: variable length array 'switches' is used [-Wvla] /kisskb/src/arch/m68k/mvme147/config.c: In function 'mvme147_hwclk': /kisskb/src/arch/m68k/mvme147/config.c:150:2: warning: #warning check me! [-Wcpp] /kisskb/src/arch/m68k/mvme16x/config.c: In function 'mvme16x_hwclk': /kisskb/src/arch/m68k/mvme16x/config.c:397:2: warning: #warning check me! [-Wcpp] /kisskb/src/kernel/printk/printk.c: In function 'devkmsg_sysctl_set_loglvl': /kisskb/src/kernel/printk/printk.c:186:16: warning: 'old' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/ipc/shm.c: In function 'ksys_shmdt': /kisskb/src/ipc/shm.c:1686:59: warning: 'file' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/lib/mpi/mpicoder.c: In function 'mpi_read_raw_from_sgl': /kisskb/src/lib/mpi/mpicoder.c:336:12: warning: 'buff' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/kernel/acct.c: In function 'acct_pin_kill': /kisskb/src/kernel/acct.c:177:2: warning: value computed is not used [-Wunused-value] /kisskb/src/net/core/gen_stats.c: In function '__gnet_stats_copy_basic': /kisskb/src/net/core/gen_stats.c:161:19: warning: 'seq' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/lib/rhashtable.c: In function 'rht_deferred_worker': /kisskb/src/lib/rhashtable.c:264:2: warning: 'next' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/lib/rhashtable.c:229:28: note: 'next' was declared here /kisskb/src/net/core/filter.c: In function 'bpf_clear_redirect_map': /kisskb/src/net/core/filter.c:3470:4: warning: value computed is not used [-Wunused-value] /kisskb/src/net/core/dev.c: In function 'validate_xmit_skb_list': /kisskb/src/net/core/dev.c:3405:15: warning: 'tail' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/fs/proc/inode.c: In function 'proc_reg_open': /kisskb/src/include/linux/list.h:65:12: warning: 'pdeo' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/fs/proc/inode.c:339:21: note: 'pdeo' was declared here /kisskb/src/drivers/net/ethernet/8390/lib8390.c:201:12: warning: '__ei_open' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:230:12: warning: '__ei_close' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:254:13: warning: '__ei_tx_timeout' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:300:20: warning: '__ei_start_xmit' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:509:13: warning: '__ei_poll' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:850:33: warning: '__ei_get_stats' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:950:13: warning: '__ei_set_multicast_list' defined but not used [-Wunused-function] /kisskb/src/drivers/net/ethernet/8390/lib8390.c:988:27: warning: '____alloc_ei_netdev' defined but not used [-Wunused-function] /kisskb/src/drivers/net/macsec.c: In function 'macsec_del_rxsa': /kisskb/src/drivers/net/macsec.c:1926:2: warning: 'assoc_num' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/macsec.c:1926:2: warning: 'rx_sc' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/macsec.c: In function 'macsec_del_txsa': /kisskb/src/drivers/net/macsec.c:2003:2: warning: 'assoc_num' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/macsec.c:2003:2: warning: 'tx_sc' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/macsec.c: In function 'macsec_upd_txsa': /kisskb/src/drivers/net/macsec.c:2068:5: warning: 'assoc_num' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/macsec.c:2068:24: warning: 'tx_sc' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/macsec.c:2069:21: warning: 'secy' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/tun.c: In function 'tun_get_user': /kisskb/src/drivers/net/tun.c:1846:30: warning: 'copylen' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/tun.c:1536:31: warning: 'linear' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/drivers/net/tun.c:1756:46: note: 'linear' was declared here /kisskb/src/fs/ocfs2/file.c: In function 'ocfs2_file_write_iter': /kisskb/src/fs/ocfs2/file.c:2386:3: warning: value computed is not used [-Wunused-value] /kisskb/src/net/ipv6/ip6_output.c: In function '__ip6_append_data.isra.37': /kisskb/src/include/linux/skbuff.h:1338:6: warning: 'extra_uref' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/net/ipv6/ip6_output.c:1270:14: note: 'extra_uref' was declared here /kisskb/src/fs/udf/unicode.c: In function 'udf_name_conv_char': /kisskb/src/fs/udf/unicode.c:132:8: warning: 'c' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/fs/posix_acl.c: In function 'get_acl': /kisskb/src/fs/posix_acl.c:147:3: warning: value computed is not used [-Wunused-value] /kisskb/src/net/sctp/output.c: In function 'sctp_packet_config': /kisskb/src/include/net/sock.h:1942:19: warning: 'sk' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/net/sctp/output.c:94:15: note: 'sk' was declared here /kisskb/src/net/ipv4/ip_output.c: In function '__ip_append_data.isra.40': /kisskb/src/include/linux/skbuff.h:1338:6: warning: 'extra_uref' may be used uninitialized in this function [-Wuninitialized] /kisskb/src/net/ipv4/ip_output.c:885:14: note: 'extra_uref' was declared here Completed OK # rm -rf /kisskb/build/linus_m68k-defconfig_m68k # Build took: 0:01:14.192652