From a6f0643e4f63cfaa0d5d4a69de4f132eac4b8fe4 Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Sun, 28 Jun 2026 20:11:03 +0000 Subject: bpf: Reject BPF_MAP_TYPE_INODE_STORAGE creation if BPF LSM is uninitialized When CONFIG_BPF_LSM=y is set, BPF inode storage maps (BPF_MAP_TYPE_INODE_STORAGE) are compiled into the kernel. However, if the BPF LSM is not explicitly enabled at boot time (e.g. omitted from the "lsm=" boot parameter), lsm_prepare() is never executed for the BPF LSM. Consequently, the BPF inode security blob offset (bpf_lsm_blob_sizes.lbs_inode) is never initialized and remains at its default compiled size of 8 bytes instead of being updated to a valid offset past the reserved struct rcu_head (typically 16 bytes or more). When a privileged user creates and updates a BPF_MAP_TYPE_INODE_STORAGE map, bpf_inode() evaluates inode->i_security + 8. This erroneously aliases the struct rcu_head.func callback pointer at the beginning of the inode->i_security blob. During subsequent map element cleanup or inode destruction, writing NULL to owner_storage clears the queued RCU callback pointer. When rcu_do_batch() later executes the queued callback, it attempts an instruction fetch at address 0x0, triggering an immediate kernel panic. Fix this by introducing a global bpf_lsm_initialized boolean flag marked with __ro_after_init. Set this flag to true inside bpf_lsm_init() when the LSM framework successfully registers the BPF LSM. Gate map allocation in inode_storage_map_alloc() on this flag, returning -EOPNOTSUPP if the BPF LSM is in turn uninitialized. This fail-fast approach prevents userspace from allocating inode storage maps when the supporting BPF LSM infrastructure is absent, avoiding zombie map states. Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes") Reported-by: oxsignal Signed-off-by: Matt Bobrowski Signed-off-by: Daniel Borkmann Reviewed-by: Emil Tsalapatis Reviewed-by: Amery Hung Link: https://lore.kernel.org/bpf/20260628201103.3624525-1-mattbobrowski@google.com --- include/linux/bpf_lsm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 143775a27a2a..dda272d78f01 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -14,6 +14,8 @@ #ifdef CONFIG_BPF_LSM +extern bool bpf_lsm_initialized __ro_after_init; + #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ RET bpf_lsm_##NAME(__VA_ARGS__); #include @@ -56,6 +58,8 @@ bool bpf_lsm_hook_returns_errno(u32 btf_id); #else /* !CONFIG_BPF_LSM */ +#define bpf_lsm_initialized false + static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) { return false; -- cgit v1.2.3 From 96cce16e26dd02a8678f1e87f88a4b5cdb63b995 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 29 Jun 2026 22:37:52 -0700 Subject: bpf: Support for hardening against JIT spraying The BPF JIT allocator packs many small programs into larger executable allocations and reuses space within those allocations as programs are loaded and freed. When fresh code is written into space that a previous program occupied, an indirect jump into the new program can reuse a branch prediction left behind by the old one. Flush the indirect branch predictors before reusing JIT memory so that indirect jumps into a newly written program don't reuse predictions from an old program that occupied the same space. Introduce bpf_arch_pred_flush_enabled static key and bpf_arch_pred_flush static call for flushing the branch predictors on JIT memory reuse. Architectures that need a flush, can update it to a predictor flush function. By default, its a NOP and does not emit any CALL. Allocations larger than a pack are not covered by this flush. That is safe because cBPF programs (the unprivileged attack surface) are bounded well below a pack size. Issue a warning if this assumption is ever violated while the flush is active. Signed-off-by: Pawan Gupta Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 67d337ede91b..f68694f94ee7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -1314,6 +1315,15 @@ extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); +/* + * Flush the indirect branch predictors before reusing JIT memory, so that + * indirect jumps into a newly written program don't reuse predictions left + * behind by an old program that occupied the same space. + */ +void bpf_arch_pred_flush(void); +DECLARE_STATIC_CALL(bpf_arch_pred_flush, bpf_arch_pred_flush); +DECLARE_STATIC_KEY_FALSE(bpf_pred_flush_enabled); + void bpf_jit_fill_hole_with_zero(void *area, unsigned int size); struct bpf_binary_header * -- cgit v1.2.3 From 0bb99f2cfaae6822d734d69722de30af823efdf3 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 29 Jun 2026 22:38:23 -0700 Subject: bpf: Restrict JIT predictor flush to cBPF Currently predictor flush on memory reuse is done for all BPF JIT allocations, but only cBPF programs can be loaded by an unprivileged user. eBPF is privileged by default, and flushing predictors for all CPUs on every eBPF reuse penalizes the common case for no security benefit. eBPF allocations can be frequent on busy systems, only flush predictors for cBPF programs. Trampoline and dispatcher allocations also skip the flush as they are eBPF-only. Signed-off-by: Pawan Gupta Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index f68694f94ee7..14acb2455746 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1338,7 +1338,7 @@ void bpf_jit_free(struct bpf_prog *fp); struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); -void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic); void bpf_prog_pack_free(void *ptr, u32 size); static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) @@ -1352,7 +1352,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, unsigned int alignment, struct bpf_binary_header **rw_hdr, u8 **rw_image, - bpf_jit_fill_hole_t bpf_fill_ill_insns); + bpf_jit_fill_hole_t bpf_fill_ill_insns, + bool was_classic); int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header); void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, -- cgit v1.2.3