summaryrefslogtreecommitdiff
path: root/kernel/nsproxy.c
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2026-03-06 17:28:37 +0100
committerChristian Brauner <brauner@kernel.org>2026-03-12 13:33:55 +0100
commit9d4e752a24f740b31ca827bfab07010e4e7f34b0 (patch)
tree870d6a4e6493541f3db525eccb9c4b5648d0a0dc /kernel/nsproxy.c
parent0209e31659d6908c6d0788c8a495b43d0a1f6f6c (diff)
namespace: allow creating empty mount namespaces
Add support for creating a mount namespace that contains only a copy of the root mount from the caller's mount namespace, with none of the child mounts. This is useful for containers and sandboxes that want to start with a minimal mount table and populate it from scratch rather than inheriting and then tearing down the full mount tree. Two new flags are introduced: - CLONE_EMPTY_MNTNS for clone3(), using the 64-bit flag space. - UNSHARE_EMPTY_MNTNS for unshare(), reusing the CLONE_PARENT_SETTID bit which has no meaning for unshare. Both flags imply CLONE_NEWNS. For the unshare path, UNSHARE_EMPTY_MNTNS is converted to CLONE_EMPTY_MNTNS in unshare_nsproxy_namespaces() before it reaches copy_mnt_ns(), so the mount namespace code only needs to handle a single flag. In copy_mnt_ns(), when CLONE_EMPTY_MNTNS is set, clone_mnt() is used instead of copy_tree() to clone only the root mount. The caller's root and working directory are both reset to the root dentry of the new mount. The cleanup variables are changed from vfsmount pointers with __free(mntput) to struct path with __free(path_put) because the empty mount namespace path needs to release both mount and dentry references when replacing the caller's root and pwd. In the normal (non-empty) path only the mount component is set, and dput(NULL) is a no-op so path_put remains correct there as well. Link: https://patch.msgid.link/20260306-work-empty-mntns-consolidated-v1-1-6eb30529bbb0@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'kernel/nsproxy.c')
-rw-r--r--kernel/nsproxy.c21
1 files changed, 16 insertions, 5 deletions
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 259c4b4f1eeb..1bdc5be2dd20 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -95,7 +95,8 @@ static struct nsproxy *create_new_namespaces(u64 flags,
if (!new_nsp)
return ERR_PTR(-ENOMEM);
- new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
+ new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns,
+ user_ns, new_fs);
if (IS_ERR(new_nsp->mnt_ns)) {
err = PTR_ERR(new_nsp->mnt_ns);
goto out_ns;
@@ -212,18 +213,28 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
{
struct user_namespace *user_ns;
+ u64 flags = unshare_flags;
int err = 0;
- if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
- CLONE_NEWTIME)))
+ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+ CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
+ CLONE_NEWTIME)))
return 0;
user_ns = new_cred ? new_cred->user_ns : current_user_ns();
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
- *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
+ /*
+ * Convert the 32-bit UNSHARE_EMPTY_MNTNS (which aliases
+ * CLONE_PARENT_SETTID) to the unique 64-bit CLONE_EMPTY_MNTNS.
+ */
+ if (flags & UNSHARE_EMPTY_MNTNS) {
+ flags &= ~(u64)UNSHARE_EMPTY_MNTNS;
+ flags |= CLONE_EMPTY_MNTNS;
+ }
+
+ *new_nsp = create_new_namespaces(flags, current, user_ns,
new_fs ? new_fs : current->fs);
if (IS_ERR(*new_nsp)) {
err = PTR_ERR(*new_nsp);