summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 03:54:54 +0530
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 03:54:54 +0530
commitff8747aacaff8266dd751b8a8648fb728dcc3b21 (patch)
tree2eb6f46a6c2f904de59b37d9edf5c2f1d2386010 /kernel
parentec5d1ae94e99d8831427d00973da5620c7fb4368 (diff)
parent9722955b54307e9070994f2382ec06af3d7405e0 (diff)
Merge tag 'vfs-7.2-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull simple_xattr updates from Christian Brauner: "This reworks the simple xattr api to make it more efficient and easier to use for all consumers. The simple_xattr hash table moves from the inode into a per-superblock cache, removing the per-inode overhead for the common case of few or no xattrs. The interface now passes struct simple_xattrs ** so lazy allocation is handled internally instead of by every caller, kernfs xattr operations on kernfs nodes shared between multiple superblocks are properly serialized, and tmpfs constructs "security.foo" xattr names with kasprintf() instead of kmalloc() plus two memcpy()s. A follow-up fix links kernfs nodes to their parent before the LSM init hook runs: with the per-sb cache kernfs_xattr_set() computes the cache via kernfs_root(kn), which faulted on a freshly allocated node when selinux_kernfs_init_security() called into it - reproducible as a NULL pointer dereference on the first cgroup mkdir on SELinux-enabled systems. On top of this bpffs gains support for trusted.* and security.* xattrs so that user space and BPF LSM programs can attach metadata - for example a content hash or a security label - to pinned objects and directories and inspect it uniformly like on other filesystems. The store is in-memory and non-persistent, living only for the lifetime of the mount like everything else in bpffs" * tag 'vfs-7.2-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: bpf: Add simple xattr support to bpffs kernfs: link kn to its parent before the LSM init hook simpe_xattr: use per-sb cache simple_xattr: change interface to pass struct simple_xattrs ** tmpfs: simplify constructing "security.foo" xattr names kernfs: fix xattr race condition with multiple superblocks
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/inode.c256
1 files changed, 237 insertions, 19 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 25c06a011825..c3f79b5a2f8c 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -21,6 +21,9 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/kstrtox.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+
#include "preload/bpf_preload.h"
enum bpf_type {
@@ -30,6 +33,23 @@ enum bpf_type {
BPF_TYPE_LINK,
};
+struct bpf_fs_inode {
+ struct list_head xattrs;
+ struct simple_xattr_limits xlimits;
+ struct inode vfs_inode;
+};
+
+static inline struct bpf_fs_inode *BPF_FS_I(struct inode *inode)
+{
+ return container_of(inode, struct bpf_fs_inode, vfs_inode);
+}
+
+static struct kmem_cache *bpf_fs_inode_cachep __ro_after_init;
+
+static int bpf_fs_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array, void *fs_info);
+static ssize_t bpf_fs_listxattr(struct dentry *dentry, char *buf, size_t size);
+
static void *bpf_any_get(void *raw, enum bpf_type type)
{
switch (type) {
@@ -94,10 +114,17 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
}
static const struct inode_operations bpf_dir_iops;
+static const struct inode_operations bpf_symlink_iops;
-static const struct inode_operations bpf_prog_iops = { };
-static const struct inode_operations bpf_map_iops = { };
-static const struct inode_operations bpf_link_iops = { };
+static const struct inode_operations bpf_prog_iops = {
+ .listxattr = bpf_fs_listxattr,
+};
+static const struct inode_operations bpf_map_iops = {
+ .listxattr = bpf_fs_listxattr,
+};
+static const struct inode_operations bpf_link_iops = {
+ .listxattr = bpf_fs_listxattr,
+};
struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir,
@@ -153,11 +180,19 @@ static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
+ int ret;
inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
if (IS_ERR(inode))
return ERR_CAST(inode);
+ ret = security_inode_init_security(inode, dir, &dentry->d_name,
+ bpf_fs_initxattrs, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ iput(inode);
+ return ERR_PTR(ret);
+ }
+
inode->i_op = &bpf_dir_iops;
inode->i_fop = &simple_dir_operations;
@@ -330,10 +365,20 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
const struct file_operations *fops)
{
struct inode *dir = dentry->d_parent->d_inode;
- struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
+ struct inode *inode;
+ int ret;
+
+ inode = bpf_get_inode(dir->i_sb, dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
+ ret = security_inode_init_security(inode, dir, &dentry->d_name,
+ bpf_fs_initxattrs, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ iput(inode);
+ return ret;
+ }
+
inode->i_op = iops;
inode->i_fop = fops;
inode->i_private = raw;
@@ -382,9 +427,11 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *target)
{
- char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
struct inode *inode;
+ char *link;
+ int ret;
+ link = kstrdup(target, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!link)
return -ENOMEM;
@@ -394,13 +441,25 @@ static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
return PTR_ERR(inode);
}
- inode->i_op = &simple_symlink_inode_operations;
+ inode->i_op = &bpf_symlink_iops;
inode->i_link = link;
+ ret = security_inode_init_security(inode, dir, &dentry->d_name,
+ bpf_fs_initxattrs, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ iput(inode);
+ return ret;
+ }
+
bpf_dentry_finalize(dentry, inode, dir);
return 0;
}
+static const struct inode_operations bpf_symlink_iops = {
+ .get_link = simple_get_link,
+ .listxattr = bpf_fs_listxattr,
+};
+
static const struct inode_operations bpf_dir_iops = {
.lookup = bpf_lookup,
.mkdir = bpf_mkdir,
@@ -409,6 +468,7 @@ static const struct inode_operations bpf_dir_iops = {
.rename = simple_rename,
.link = simple_link,
.unlink = simple_unlink,
+ .listxattr = bpf_fs_listxattr,
};
/* pin iterator link into bpffs */
@@ -762,22 +822,147 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
+static struct inode *bpf_fs_alloc_inode(struct super_block *sb)
+{
+ struct bpf_fs_inode *bi;
+
+ bi = alloc_inode_sb(sb, bpf_fs_inode_cachep, GFP_KERNEL);
+ if (!bi)
+ return NULL;
+ INIT_LIST_HEAD_RCU(&bi->xattrs);
+ simple_xattr_limits_init(&bi->xlimits);
+ return &bi->vfs_inode;
+}
+
static void bpf_destroy_inode(struct inode *inode)
{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
enum bpf_type type;
- if (S_ISLNK(inode->i_mode))
- kfree(inode->i_link);
if (!bpf_inode_type(inode, &type))
bpf_any_put(inode->i_private, type);
- free_inode_nonrcu(inode);
+ simple_xattrs_free(&opts->xa_cache, &bi->xattrs, NULL);
+}
+
+static void bpf_free_inode(struct inode *inode)
+{
+ if (S_ISLNK(inode->i_mode))
+ kfree(inode->i_link);
+ kmem_cache_free(bpf_fs_inode_cachep, BPF_FS_I(inode));
+}
+
+static int bpf_fs_xattr_get(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
+
+ name = xattr_full_name(handler, name);
+ return simple_xattr_get(&opts->xa_cache, &bi->xattrs, name, value, size);
+}
+
+enum {
+ BPF_FS_XATTR_UNSPEC,
+ BPF_FS_XATTR_SECURITY,
+ BPF_FS_XATTR_TRUSTED,
+};
+
+static int bpf_fs_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap, struct dentry *unused,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
+ struct simple_xattr *old;
+ int err = -EINVAL;
+
+ name = xattr_full_name(handler, name);
+ switch (handler->flags) {
+ case BPF_FS_XATTR_SECURITY:
+ err = simple_xattr_set_limited(&opts->xa_cache, &bi->xattrs,
+ &bi->xlimits, name, value, size,
+ flags);
+ break;
+ case BPF_FS_XATTR_TRUSTED:
+ old = simple_xattr_set(&opts->xa_cache, &bi->xattrs, name,
+ value, size, flags);
+ err = IS_ERR(old) ? PTR_ERR(old) : 0;
+ if (!err)
+ simple_xattr_free_rcu(old);
+ break;
+ }
+ if (err)
+ return err;
+ inode_set_ctime_current(inode);
+ return 0;
+}
+
+static const struct xattr_handler bpf_fs_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .flags = BPF_FS_XATTR_TRUSTED,
+ .get = bpf_fs_xattr_get,
+ .set = bpf_fs_xattr_set,
+};
+
+static const struct xattr_handler bpf_fs_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .flags = BPF_FS_XATTR_SECURITY,
+ .get = bpf_fs_xattr_get,
+ .set = bpf_fs_xattr_set,
+};
+
+static const struct xattr_handler * const bpf_fs_xattr_handlers[] = {
+ &bpf_fs_trusted_xattr_handler,
+ &bpf_fs_security_xattr_handler,
+ NULL,
+};
+
+static ssize_t bpf_fs_listxattr(struct dentry *dentry, char *buf, size_t size)
+{
+ struct inode *inode = d_inode(dentry);
+
+ return simple_xattr_list(inode, &BPF_FS_I(inode)->xattrs, buf, size);
+}
+
+static int bpf_fs_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array, void *fs_info)
+{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
+ const struct xattr *xattr;
+ int err;
+
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
+ if (IS_ERR(new_xattr))
+ return PTR_ERR(new_xattr);
+
+ new_xattr->name = kasprintf(GFP_KERNEL_ACCOUNT,
+ XATTR_SECURITY_PREFIX "%s",
+ xattr->name);
+ if (!new_xattr->name)
+ return -ENOMEM;
+
+ err = simple_xattr_add_limited(&opts->xa_cache, &bi->xattrs,
+ &bi->xlimits, new_xattr);
+ if (err)
+ return err;
+
+ retain_and_null_ptr(new_xattr);
+ }
+ return 0;
}
const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = inode_just_drop,
.show_options = bpf_show_options,
+ .alloc_inode = bpf_fs_alloc_inode,
.destroy_inode = bpf_destroy_inode,
+ .free_inode = bpf_free_inode,
};
enum {
@@ -996,25 +1181,38 @@ out:
static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
- static const struct tree_descr bpf_rfiles[] = { { "" } };
struct bpf_mount_opts *opts = sb->s_fs_info;
struct inode *inode;
- int ret;
/* Mounting an instance of BPF FS requires privileges */
if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
return -EPERM;
- ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
- if (ret)
- return ret;
-
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = BPF_FS_MAGIC;
sb->s_op = &bpf_super_ops;
+ sb->s_xattr = bpf_fs_xattr_handlers;
+ sb->s_iflags |= SB_I_NOEXEC;
+ sb->s_iflags |= SB_I_NODEV;
+ sb->s_time_gran = 1;
+
+ inode = bpf_get_inode(sb, NULL, S_IFDIR | 0777);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_ino = 1;
+ inode->i_op = &bpf_dir_iops;
+ inode->i_fop = &simple_dir_operations;
+ set_nlink(inode, 2);
+
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ return -ENOMEM;
- inode = sb->s_root->d_inode;
+ inode = d_inode(sb->s_root);
inode->i_uid = opts->uid;
inode->i_gid = opts->gid;
- inode->i_op = &bpf_dir_iops;
inode->i_mode &= ~S_IALLUGO;
populate_bpffs(sb->s_root);
inode->i_mode |= S_ISVTX | opts->mode;
@@ -1068,6 +1266,7 @@ static void bpf_kill_super(struct super_block *sb)
struct bpf_mount_opts *opts = sb->s_fs_info;
kill_anon_super(sb);
+ simple_xattr_cache_cleanup(&opts->xa_cache);
kfree(opts);
}
@@ -1080,18 +1279,37 @@ static struct file_system_type bpf_fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
+static void bpf_fs_inode_init_once(void *foo)
+{
+ struct bpf_fs_inode *bi = foo;
+
+ inode_init_once(&bi->vfs_inode);
+}
+
static int __init bpf_init(void)
{
int ret;
+ bpf_fs_inode_cachep = kmem_cache_create("bpf_fs_inode_cache",
+ sizeof(struct bpf_fs_inode),
+ 0, SLAB_ACCOUNT,
+ bpf_fs_inode_init_once);
+ if (!bpf_fs_inode_cachep)
+ return -ENOMEM;
+
ret = sysfs_create_mount_point(fs_kobj, "bpf");
if (ret)
- return ret;
+ goto out_cache;
ret = register_filesystem(&bpf_fs_type);
- if (ret)
+ if (ret) {
sysfs_remove_mount_point(fs_kobj, "bpf");
+ goto out_cache;
+ }
+ return 0;
+out_cache:
+ kmem_cache_destroy(bpf_fs_inode_cachep);
return ret;
}
fs_initcall(bpf_init);