kernel_optimize_test/fs/mount.h
Eric W. Biederman 99b19d1647 mnt: In propgate_umount handle visiting mounts in any order
While investigating some poor umount performance I realized that in
the case of overlapping mount trees where some of the mounts are locked
the code has been failing to unmount all of the mounts it should
have been unmounting.

This failure to unmount all of the necessary
mounts can be reproduced with:

$ cat locked_mounts_test.sh

mount -t tmpfs test-base /mnt
mount --make-shared /mnt
mkdir -p /mnt/b

mount -t tmpfs test1 /mnt/b
mount --make-shared /mnt/b
mkdir -p /mnt/b/10

mount -t tmpfs test2 /mnt/b/10
mount --make-shared /mnt/b/10
mkdir -p /mnt/b/10/20

mount --rbind /mnt/b /mnt/b/10/20

unshare -Urm --propagation unchaged /bin/sh -c 'sleep 5; if [ $(grep test /proc/self/mountinfo | wc -l) -eq 1 ] ; then echo SUCCESS ; else echo FAILURE ; fi'
sleep 1
umount -l /mnt/b
wait %%

$ unshare -Urm ./locked_mounts_test.sh

This failure is corrected by removing the prepass that marks mounts
that may be umounted.

A first pass is added that umounts mounts if possible and if not sets
mount mark if they could be unmounted if they weren't locked and adds
them to a list to umount possibilities.  This first pass reconsiders
the mounts parent if it is on the list of umount possibilities, ensuring
that information of umoutability will pass from child to mount parent.

A second pass then walks through all mounts that are umounted and processes
their children unmounting them or marking them for reparenting.

A last pass cleans up the state on the mounts that could not be umounted
and if applicable reparents them to their first parent that remained
mounted.

While a bit longer than the old code this code is much more robust
as it allows information to flow up from the leaves and down
from the trunk making the order in which mounts are encountered
in the umount propgation tree irrelevant.

Cc: stable@vger.kernel.org
Fixes: 0c56fe3142 ("mnt: Don't propagate unmounts to locked mounts")
Reviewed-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2017-05-23 08:41:16 -05:00

148 lines
3.8 KiB
C

#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
#include <linux/ns_common.h>
#include <linux/fs_pin.h>
struct mnt_namespace {
atomic_t count;
struct ns_common ns;
struct mount * root;
struct list_head list;
struct user_namespace *user_ns;
struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
unsigned int mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
};
struct mnt_pcp {
int mnt_count;
int mnt_writers;
};
struct mountpoint {
struct hlist_node m_hash;
struct dentry *m_dentry;
struct hlist_head m_list;
int m_count;
};
struct mount {
struct hlist_node mnt_hash;
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
union {
struct rcu_head mnt_rcu;
struct llist_node mnt_llist;
};
#ifdef CONFIG_SMP
struct mnt_pcp __percpu *mnt_pcp;
#else
int mnt_count;
int mnt_writers;
#endif
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
struct list_head mnt_instance; /* mount instance on sb->s_mounts */
const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
struct list_head mnt_expire; /* link in fs-specific expiry list */
struct list_head mnt_share; /* circular list of shared mounts */
struct list_head mnt_slave_list;/* list of slave mounts */
struct list_head mnt_slave; /* slave list entry */
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
#endif
int mnt_id; /* mount identifier */
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
struct hlist_head mnt_pins;
struct fs_pin mnt_umount;
struct dentry *mnt_ex_mountpoint;
};
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
static inline struct mount *real_mount(struct vfsmount *mnt)
{
return container_of(mnt, struct mount, mnt);
}
static inline int mnt_has_parent(struct mount *mnt)
{
return mnt != mnt->mnt_parent;
}
static inline int is_mounted(struct vfsmount *mnt)
{
/* neither detached nor internal? */
return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
static inline bool __path_is_mountpoint(const struct path *path)
{
struct mount *m = __lookup_mnt(path->mnt, path->dentry);
return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT));
}
extern void __detach_mounts(struct dentry *dentry);
static inline void detach_mounts(struct dentry *dentry)
{
if (!d_mountpoint(dentry))
return;
__detach_mounts(dentry);
}
static inline void get_mnt_ns(struct mnt_namespace *ns)
{
atomic_inc(&ns->count);
}
extern seqlock_t mount_lock;
static inline void lock_mount_hash(void)
{
write_seqlock(&mount_lock);
}
static inline void unlock_mount_hash(void)
{
write_sequnlock(&mount_lock);
}
struct proc_mounts {
struct mnt_namespace *ns;
struct path root;
int (*show)(struct seq_file *, struct vfsmount *);
void *cached_mount;
u64 cached_event;
loff_t cached_index;
};
extern const struct seq_operations mounts_op;
extern bool __is_local_mountpoint(struct dentry *dentry);
static inline bool is_local_mountpoint(struct dentry *dentry)
{
if (!d_mountpoint(dentry))
return false;
return __is_local_mountpoint(dentry);
}