# git rev-parse -q --verify d3110f256d126b44d34c1f662310cd295877c447^{commit} d3110f256d126b44d34c1f662310cd295877c447 already have revision, skipping fetch # git checkout -q -f -B kisskb d3110f256d126b44d34c1f662310cd295877c447 # git clean -qxdf # < git log -1 # commit d3110f256d126b44d34c1f662310cd295877c447 # Merge: d0df9aabefda ee2e3f50629f # Author: Linus Torvalds # Date: Wed Mar 10 10:01:35 2021 -0800 # # Merge tag 'for-linus-2021-03-10' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux # # Pull detached mounts fix from Christian Brauner: # "Creating a series of detached mounts, attaching them to the # filesystem, and unmounting them can be used to trigger an integer # overflow in ns->mounts causing the kernel to block any new mounts in # count_mounts() and returning ENOSPC because it falsely assumes that # the maximum number of mounts in the mount namespace has been reached, # i.e. it thinks it can't fit the new mounts into the mount namespace # anymore. # # Without this fix heavy use of the new mount API with move_mount() will # cause the host to become unuseable and thus blocks some xfstest # patches I want to resend. # # Depending on the number of mounts in your system, this can be # reproduced on any kernel that supportes open_tree() and move_mount(). # # A reproducer has been sent for inclusion with xfstests. It takes care # to do this in another mount namespace, not in the host's mount # namespace so there shouldn't be any risk in running it but if one did # run it on the host it would require a reboot in order to be able to # mount again. See # # https://lore.kernel.org/fstests/20210309121041.753359-1-christian.brauner@ubuntu.com # # The root cause of this is that detached mounts aren't handled # correctly when source and target mount are identical and reside on a # shared mount causing a broken mount tree where the detached source # itself is propagated which propagation prevents for regular # bind-mounts and new mounts. # # This ultimately leads to a miscalculation of the number of mounts in # the mount namespace. # # Detached mounts created via 'open_tree(fd, path, OPEN_TREE_CLONE)' are # essentially like an unattached bind-mount. They can then later on be # attached to the filesystem via move_mount() which calls into # attach_recursive_mount(). # # Part of attaching it to the filesystem is making sure that mounts get # correctly propagated in case the destination mountpoint is MS_SHARED, # i.e. is a shared mountpoint. This is done by calling into # propagate_mnt() which walks the list of peers calling propagate_one() # on each mount in this list making sure it receives the propagation # event. The propagate_one() function thereby skips both new mounts and # bind mounts to not propagate them "into themselves". Both are # identified by checking whether the mount is already attached to any # mount namespace in mnt->mnt_ns. The is what the IS_MNT_NEW() helper is # responsible for. # # However, detached mounts have an anonymous mount namespace attached to # them stashed in mnt->mnt_ns which means that IS_MNT_NEW() doesn't # realize they need to be skipped causing the mount to propagate "into # itself" breaking the mount table and causing a disconnect between the # number of mounts recorded as being beneath or reachable from the # target mountpoint and the number of mounts actually recorded/counted # in ns->mounts ultimately causing an overflow which in turn prevents # any new mounts via the ENOSPC issue. # # So teach propagation to handle detached mounts by making it aware of # them. I've been tracking this issue down for the last couple of days # and then verifying that the fix is correct by unmounting everything in # my current mount table leaving only /proc and /sys mounted and running # the reproducer above overnight verifying the number of mounts counted # in ns->mounts. With this fix the counts are correct and the ENOSPC # issue can't be reproduced. # # This change will only have an effect on mounts created with the new # mount API since detached mounts cannot be created with the old mount # API so regressions are extremely unlikely. # # Here's an illustration: # # #### mount(): # ubuntu@f1-vm:~$ sudo mount --bind /mnt/ /mnt/ # ubuntu@f1-vm:~$ findmnt | grep -i mnt # ├─/mnt /dev/sda2[/mnt] ext4 rw,relatime # # #### open_tree(OPEN_TREE_CLONE) + move_mount() with bug: # ubuntu@f1-vm:~$ sudo ./mount-new /mnt/ /mnt/ # ubuntu@f1-vm:~$ findmnt | grep -i mnt # ├─/mnt /dev/sda2[/mnt] ext4 rw,relatime # │ └─/mnt /dev/sda2[/mnt] ext4 rw,relatime # # #### open_tree(OPEN_TREE_CLONE) + move_mount() with the fix: # ubuntu@f1-vm:~$ sudo ./mount-new /mnt /mnt # ubuntu@f1-vm:~$ findmnt | grep -i mnt # └─/mnt /dev/sda2[/mnt] ext4 rw,relatime" # # * tag 'for-linus-2021-03-10' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: # mount: fix mounting of detached mounts onto targets that reside on shared mounts # < /opt/cross/kisskb/korg/gcc-8.1.0-nolibc/m68k-linux/bin/m68k-linux-gcc --version # < /opt/cross/kisskb/korg/gcc-8.1.0-nolibc/m68k-linux/bin/m68k-linux-ld --version # < git log --format=%s --max-count=1 d3110f256d126b44d34c1f662310cd295877c447 # < make -s -j 48 ARCH=m68k O=/kisskb/build/linus_m68k-defconfig_m68k-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.1.0-nolibc/m68k-linux/bin/m68k-linux- defconfig # < make -s -j 48 ARCH=m68k O=/kisskb/build/linus_m68k-defconfig_m68k-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.1.0-nolibc/m68k-linux/bin/m68k-linux- help # make -s -j 48 ARCH=m68k O=/kisskb/build/linus_m68k-defconfig_m68k-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.1.0-nolibc/m68k-linux/bin/m68k-linux- olddefconfig # make -s -j 48 ARCH=m68k O=/kisskb/build/linus_m68k-defconfig_m68k-gcc8 CROSS_COMPILE=/opt/cross/kisskb/korg/gcc-8.1.0-nolibc/m68k-linux/bin/m68k-linux- /kisskb/src/arch/m68k/mvme16x/config.c: In function 'mvme16x_hwclk': /kisskb/src/arch/m68k/mvme16x/config.c:437:2: warning: #warning check me! [-Wcpp] #warning check me! ^~~~~~~ /kisskb/src/arch/m68k/mvme147/config.c: In function 'mvme147_hwclk': /kisskb/src/arch/m68k/mvme147/config.c:172:2: warning: #warning check me! [-Wcpp] #warning check me! ^~~~~~~ In file included from /kisskb/src/drivers/net/ethernet/8390/xsurf100.c:48: /kisskb/src/drivers/net/ethernet/8390/lib8390.c:995:27: warning: '____alloc_ei_netdev' defined but not used [-Wunused-function] static struct net_device *____alloc_ei_netdev(int size) ^~~~~~~~~~~~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:957:13: warning: '__ei_set_multicast_list' defined but not used [-Wunused-function] static void __ei_set_multicast_list(struct net_device *dev) ^~~~~~~~~~~~~~~~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:857:33: warning: '__ei_get_stats' defined but not used [-Wunused-function] static struct net_device_stats *__ei_get_stats(struct net_device *dev) ^~~~~~~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:512:13: warning: '__ei_poll' defined but not used [-Wunused-function] static void __ei_poll(struct net_device *dev) ^~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:303:20: warning: '__ei_start_xmit' defined but not used [-Wunused-function] static netdev_tx_t __ei_start_xmit(struct sk_buff *skb, ^~~~~~~~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:257:13: warning: '__ei_tx_timeout' defined but not used [-Wunused-function] static void __ei_tx_timeout(struct net_device *dev, unsigned int txqueue) ^~~~~~~~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:233:12: warning: '__ei_close' defined but not used [-Wunused-function] static int __ei_close(struct net_device *dev) ^~~~~~~~~~ /kisskb/src/drivers/net/ethernet/8390/lib8390.c:204:12: warning: '__ei_open' defined but not used [-Wunused-function] static int __ei_open(struct net_device *dev) ^~~~~~~~~ Completed OK # rm -rf /kisskb/build/linus_m68k-defconfig_m68k-gcc8 # Build took: 0:02:06.698134