summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-07-02 16:39:28 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2026-07-02 16:39:28 -1000
commit51512e22efe813d8223de27f6fd02a8a48ea2323 (patch)
tree71652a4126cce9f0529d265519df11fb118af40b
parent826eec5b5efd785dc87638a54d5ecc9f88e5afce (diff)
parentb72e29e0f7ee329d89f86db8700c8ea99b4a370a (diff)
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull BPF fixes from Daniel Borkmann: - Initialize task local storage before fork bails out to free the task (Jann Horn) - Fix insn_aux_data leak on verifier error path (KaFai Wan) - Reject BPF inode storage map creation when BPF LSM is uninitialized (Matt Bobrowski) - Mask pseudo pointer values in verifier logs when pointer leaks are not allowed (Nuoqi Gui) - Harden BPF JIT against spraying via IBPB flush (Pawan Gupta) - Reject a skb-modifying SK_SKB stream parser since the latter is only meant to measure the next message (Sechang Lim) - Fix bpf_refcount_acquire to reject refcounted allocation arguments with a non-zero fixed offset (Yiyang Chen) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf: Prefer dirty packs for eBPF allocations bpf: Prefer packs that won't trigger an IBPB flush on allocation bpf: Skip redundant IBPB in pack allocator bpf: Restrict JIT predictor flush to cBPF x86/bugs: Enable IBPB flush on BPF JIT allocation bpf: Support for hardening against JIT spraying bpf: Reject BPF_MAP_TYPE_INODE_STORAGE creation if BPF LSM is uninitialized bpf,fork: wipe ->bpf_storage before bailouts that access it bpf: Fix insn_aux_data leak on verifier err_free_env path selftests/bpf: Cover pseudo-BTF ksym log masking bpf: Mask pseudo pointer values in verifier logs selftests/bpf: Cover refcount acquire node offsets bpf: Reject offset refcount acquire arguments selftests/bpf: test rejection of a packet-modifying SK_SKB stream parser bpf, sockmap: reject a packet-modifying SK_SKB stream parser selftests/bpf: don't modify the skb in the strparser parser prog
-rw-r--r--arch/arm64/net/bpf_jit_comp.c4
-rw-r--r--arch/loongarch/net/bpf_jit.c5
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c4
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c2
-rw-r--r--arch/riscv/net/bpf_jit_core.c3
-rw-r--r--arch/x86/include/asm/nospec-branch.h4
-rw-r--r--arch/x86/kernel/cpu/bugs.c50
-rw-r--r--arch/x86/net/bpf_jit_comp.c5
-rw-r--r--include/linux/bpf_lsm.h4
-rw-r--r--include/linux/filter.h15
-rw-r--r--kernel/bpf/bpf_inode_storage.c9
-rw-r--r--kernel/bpf/core.c68
-rw-r--r--kernel/bpf/disasm.c5
-rw-r--r--kernel/bpf/dispatcher.c2
-rw-r--r--kernel/bpf/verifier.c34
-rw-r--r--kernel/fork.c9
-rw-r--r--net/core/sock_map.c20
-rw-r--r--security/bpf/hooks.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_strp.c31
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c34
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_strp.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c19
23 files changed, 297 insertions, 62 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f6bcc0e1a950..b0075ece4a6e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2177,7 +2177,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
image_size = extable_offset + extable_size;
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
sizeof(u64), &header, &image_ptr,
- jit_fill_hole);
+ jit_fill_hole, was_classic);
if (!ro_header)
goto out_off;
@@ -2870,7 +2870,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
void *arch_alloc_bpf_trampoline(unsigned int size)
{
- return bpf_prog_pack_alloc(size, jit_fill_hole);
+ return bpf_prog_pack_alloc(size, jit_fill_hole, false);
}
void arch_free_bpf_trampoline(void *image, unsigned int size)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index ad7e28375aa9..2738b4db1165 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1790,7 +1790,7 @@ static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn,
void *arch_alloc_bpf_trampoline(unsigned int size)
{
- return bpf_prog_pack_alloc(size, jit_fill_hole);
+ return bpf_prog_pack_alloc(size, jit_fill_hole, false);
}
void arch_free_bpf_trampoline(void *image, unsigned int size)
@@ -2256,7 +2256,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
image_size = prog_size + extable_size;
/* Now we know the size of the structure to make */
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32),
- &header, &image_ptr, jit_fill_hole);
+ &header, &image_ptr, jit_fill_hole,
+ bpf_prog_was_classic(prog));
if (!ro_header)
goto out_offset;
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d4a17e18c9fb..7b07b43575f1 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -295,7 +295,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
- bpf_jit_fill_ill_insns);
+ bpf_jit_fill_ill_insns, bpf_prog_was_classic(fp));
if (!fhdr)
goto out_err;
@@ -588,7 +588,7 @@ bool bpf_jit_inlines_helper_call(s32 imm)
void *arch_alloc_bpf_trampoline(unsigned int size)
{
- return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns);
+ return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns, false);
}
void arch_free_bpf_trampoline(void *image, unsigned int size)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c03c1de16b79..f9d5347ba966 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1321,7 +1321,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
void *arch_alloc_bpf_trampoline(unsigned int size)
{
- return bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
+ return bpf_prog_pack_alloc(size, bpf_fill_ill_insns, false);
}
void arch_free_bpf_trampoline(void *image, unsigned int size)
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 4365d07aaf54..ce3bd3762e08 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -109,7 +109,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
bpf_jit_binary_pack_alloc(prog_size + extable_size,
&jit_data->ro_image, sizeof(u32),
&jit_data->header, &jit_data->image,
- bpf_fill_ill_insns);
+ bpf_fill_ill_insns,
+ bpf_prog_was_classic(prog));
if (!jit_data->ro_header)
goto out_offset;
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4f4b5e8a1574..b68892e6d58c 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -388,6 +388,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void write_ibpb(void);
+#ifdef CONFIG_BPF_JIT
+extern void bpf_arch_ibpb(void);
+#endif
+
#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
#endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 83f51cab0b1e..d9af230c0512 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -16,6 +16,7 @@
#include <linux/sched/smt.h>
#include <linux/pgtable.h>
#include <linux/bpf.h>
+#include <linux/filter.h>
#include <linux/kvm_types.h>
#include <asm/spec-ctrl.h>
@@ -1651,8 +1652,21 @@ static inline const char *spectre_v2_module_string(void)
{
return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
}
+
+/*
+ * The "retpoline sequence" is the "call;mov;ret" sequence that
+ * replaces normal indirect branch instructions. Differentiate
+ * *the* retpoline sequence from the LFENCE-prefixed indirect
+ * branches that simply use the retpoline infrastructure.
+ */
+static inline bool retpoline_seq_enabled(void)
+{
+ return boot_cpu_has(X86_FEATURE_RETPOLINE) && !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE);
+}
+
#else
static inline const char *spectre_v2_module_string(void) { return ""; }
+static inline bool retpoline_seq_enabled(void) { return false; }
#endif
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
@@ -2095,8 +2109,7 @@ static void __init bhi_apply_mitigation(void)
return;
/* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
- if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
- !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (retpoline_seq_enabled()) {
spec_ctrl_disable_kernel_rrsba();
if (rrsba_disabled)
return;
@@ -2238,6 +2251,27 @@ static void __init spectre_v2_update_mitigation(void)
pr_info("%s\n", spectre_v2_strings[spectre_v2_enabled]);
}
+#ifdef CONFIG_BPF_JIT
+static void __bpf_arch_ibpb(void *unused)
+{
+ write_ibpb();
+}
+
+void bpf_arch_ibpb(void)
+{
+ on_each_cpu(__bpf_arch_ibpb, NULL, 1);
+}
+
+static bool __init cpu_wants_ibpb_bpf(void)
+{
+ /* A genuine retpoline already neutralizes ring0 indirect predictions */
+ if (retpoline_seq_enabled())
+ return false;
+
+ return boot_cpu_has(X86_FEATURE_IBPB);
+}
+#endif
+
static void __init spectre_v2_apply_mitigation(void)
{
if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
@@ -2314,6 +2348,14 @@ static void __init spectre_v2_apply_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
+
+#ifdef CONFIG_BPF_JIT
+ if (cpu_wants_ibpb_bpf()) {
+ static_call_update(bpf_arch_pred_flush, bpf_arch_ibpb);
+ static_branch_enable(&bpf_pred_flush_enabled);
+ pr_info("Enabling IBPB for BPF\n");
+ }
+#endif
}
static void update_stibp_msr(void * __unused)
@@ -3490,9 +3532,7 @@ static const char *spectre_bhi_state(void)
return "; BHI: BHI_DIS_S";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
return "; BHI: SW loop, KVM: SW loop";
- else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
- !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) &&
- rrsba_disabled)
+ else if (retpoline_seq_enabled() && rrsba_disabled)
return "; BHI: Retpoline";
else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_VMEXIT))
return "; BHI: Vulnerable, KVM: SW loop";
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 054e043ffcd2..de7515ea1bea 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3653,7 +3653,7 @@ cleanup:
void *arch_alloc_bpf_trampoline(unsigned int size)
{
- return bpf_prog_pack_alloc(size, jit_fill_hole);
+ return bpf_prog_pack_alloc(size, jit_fill_hole, false);
}
void arch_free_bpf_trampoline(void *image, unsigned int size)
@@ -3965,7 +3965,8 @@ out_image:
/* allocate module memory for x86 insns and extable */
header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
&image, align, &rw_header, &rw_image,
- jit_fill_hole);
+ jit_fill_hole,
+ bpf_prog_was_classic(prog));
if (!header)
goto out_addrs;
prog->aux->extable = (void *) image + roundup(proglen, align);
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 143775a27a2a..dda272d78f01 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -14,6 +14,8 @@
#ifdef CONFIG_BPF_LSM
+extern bool bpf_lsm_initialized __ro_after_init;
+
#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
RET bpf_lsm_##NAME(__VA_ARGS__);
#include <linux/lsm_hook_defs.h>
@@ -56,6 +58,8 @@ bool bpf_lsm_hook_returns_errno(u32 btf_id);
#else /* !CONFIG_BPF_LSM */
+#define bpf_lsm_initialized false
+
static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
{
return false;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 67d337ede91b..14acb2455746 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,6 +21,7 @@
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
#include <linux/sockptr.h>
+#include <linux/static_call.h>
#include <linux/u64_stats_sync.h>
#include <net/sch_generic.h>
@@ -1314,6 +1315,15 @@ extern long bpf_jit_limit_max;
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+/*
+ * Flush the indirect branch predictors before reusing JIT memory, so that
+ * indirect jumps into a newly written program don't reuse predictions left
+ * behind by an old program that occupied the same space.
+ */
+void bpf_arch_pred_flush(void);
+DECLARE_STATIC_CALL(bpf_arch_pred_flush, bpf_arch_pred_flush);
+DECLARE_STATIC_KEY_FALSE(bpf_pred_flush_enabled);
+
void bpf_jit_fill_hole_with_zero(void *area, unsigned int size);
struct bpf_binary_header *
@@ -1328,7 +1338,7 @@ void bpf_jit_free(struct bpf_prog *fp);
struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp);
-void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic);
void bpf_prog_pack_free(void *ptr, u32 size);
static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
@@ -1342,7 +1352,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image,
unsigned int alignment,
struct bpf_binary_header **rw_hdr,
u8 **rw_image,
- bpf_jit_fill_hole_t bpf_fill_ill_insns);
+ bpf_jit_fill_hole_t bpf_fill_ill_insns,
+ bool was_classic);
int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header,
struct bpf_binary_header *rw_header);
void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 0da8d923e39d..f9e81060c1f4 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -178,6 +178,15 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
{
+ /*
+ * Do not allow allocation of BPF_MAP_TYPE_INODE_STORAGE if the BPF LSM
+ * was not initialized by the LSM framework at boot. Without proper
+ * initialization, the BPF inode security blob offset remains unprepared,
+ * causing bpf_inode() to calculate an invalid memory offset and corrupt
+ * inode->i_security.
+ */
+ if (!bpf_lsm_initialized)
+ return ERR_PTR(-EOPNOTSUPP);
return bpf_local_storage_map_alloc(attr, &inode_cache);
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 649cce41e13f..6e19a030da6f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -20,6 +20,7 @@
#include <uapi/linux/btf.h>
#include <linux/filter.h>
#include <linux/skbuff.h>
+#include <linux/static_call.h>
#include <linux/vmalloc.h>
#include <linux/prandom.h>
#include <linux/bpf.h>
@@ -875,6 +876,7 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
struct bpf_prog_pack {
struct list_head list;
void *ptr;
+ bool arch_flush_needed;
unsigned long bitmap[];
};
@@ -883,6 +885,15 @@ void bpf_jit_fill_hole_with_zero(void *area, unsigned int size)
memset(area, 0, size);
}
+DEFINE_STATIC_CALL_NULL(bpf_arch_pred_flush, bpf_arch_pred_flush);
+
+/*
+ * Enabled once bpf_arch_pred_flush points at a real flush routine. Lets the
+ * pack allocator test "is a predictor flush wired up at all" with a cheap
+ * static branch instead of repeatedly querying the static call target.
+ */
+DEFINE_STATIC_KEY_FALSE(bpf_pred_flush_enabled);
+
#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
static DEFINE_MUTEX(pack_mutex);
@@ -918,6 +929,8 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins
bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE);
bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
+ if (static_branch_unlikely(&bpf_pred_flush_enabled))
+ pack->arch_flush_needed = true;
set_vm_flush_reset_perms(pack->ptr);
err = set_memory_rox((unsigned long)pack->ptr,
BPF_PROG_PACK_SIZE / PAGE_SIZE);
@@ -932,15 +945,23 @@ out:
return NULL;
}
-void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
+void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic)
{
unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size);
- struct bpf_prog_pack *pack;
- unsigned long pos;
+ struct bpf_prog_pack *pack, *fallback_pack = NULL;
+ unsigned long pos, fallback_pos = 0;
void *ptr = NULL;
mutex_lock(&pack_mutex);
if (size > BPF_PROG_PACK_SIZE) {
+ /*
+ * Allocations larger than a pack get their own pages, and
+ * predictors are not flushed for such allocation. This is only
+ * safe because cBPF programs (the unprivileged attack surface)
+ * are bounded well below a pack size.
+ */
+ if (was_classic && static_branch_unlikely(&bpf_pred_flush_enabled))
+ pr_warn_once("BPF: Predictors not flushed for allocations greater than BPF_PROG_PACK_SIZE\n");
size = round_up(size, PAGE_SIZE);
ptr = bpf_jit_alloc_exec(size);
if (ptr) {
@@ -960,8 +981,29 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
list_for_each_entry(pack, &pack_list, list) {
pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
nbits, 0);
- if (pos < BPF_PROG_CHUNK_COUNT)
+ if (pos >= BPF_PROG_CHUNK_COUNT)
+ continue;
+ /* Flush not enabled, use any pack */
+ if (!static_branch_unlikely(&bpf_pred_flush_enabled))
goto found_free_area;
+ /*
+ * cBPF reuse of a dirty pack triggers a flush, so prefer a
+ * clean pack for cBPF. eBPF never flushes, so steer it to a
+ * dirty pack and keep clean packs free for cBPF.
+ */
+ if (was_classic ^ pack->arch_flush_needed)
+ goto found_free_area;
+ if (!fallback_pack) {
+ fallback_pack = pack;
+ fallback_pos = pos;
+ }
+ }
+
+ /* No preferred pack found */
+ if (fallback_pack) {
+ pack = fallback_pack;
+ pos = fallback_pos;
+ goto found_free_area;
}
pack = alloc_new_pack(bpf_fill_ill_insns);
@@ -971,6 +1013,16 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
pos = 0;
found_free_area:
+ /* Flush only for cBPF as it may contain a crafted gadget */
+ if (static_branch_unlikely(&bpf_pred_flush_enabled) &&
+ pack->arch_flush_needed &&
+ was_classic) {
+ struct bpf_prog_pack *p;
+
+ static_call_cond(bpf_arch_pred_flush)();
+ list_for_each_entry(p, &pack_list, list)
+ p->arch_flush_needed = false;
+ }
bitmap_set(pack->bitmap, pos, nbits);
ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT);
@@ -1008,6 +1060,9 @@ void bpf_prog_pack_free(void *ptr, u32 size)
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
bitmap_clear(pack->bitmap, pos, nbits);
+
+ if (static_branch_unlikely(&bpf_pred_flush_enabled))
+ pack->arch_flush_needed = true;
if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
BPF_PROG_CHUNK_COUNT, 0) == 0) {
list_del(&pack->list);
@@ -1130,7 +1185,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
struct bpf_binary_header **rw_header,
u8 **rw_image,
- bpf_jit_fill_hole_t bpf_fill_ill_insns)
+ bpf_jit_fill_hole_t bpf_fill_ill_insns,
+ bool was_classic)
{
struct bpf_binary_header *ro_header;
u32 size, hole, start;
@@ -1143,7 +1199,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
if (bpf_jit_charge_modmem(size))
return NULL;
- ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
+ ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns, was_classic);
if (!ro_header) {
bpf_jit_uncharge_modmem(size);
return NULL;
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index f8a3c7eb451e..0391b3bc0073 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -323,7 +323,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
- insn->src_reg == BPF_PSEUDO_MAP_VALUE;
+ insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX ||
+ insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE ||
+ insn->src_reg == BPF_PSEUDO_BTF_ID;
char tmp[64];
if (is_ptr && !allow_ptr_leaks)
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index b77db7413f8c..ea2d60dc1fee 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -145,7 +145,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
mutex_lock(&d->mutex);
if (!d->image) {
- d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero);
+ d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero, false);
if (!d->image)
goto out;
d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 21a365d436a5..6515d4d3c003 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7996,9 +7996,10 @@ reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
return field;
}
-static int check_func_arg_reg_off(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, argno_t argno,
- enum bpf_arg_type arg_type)
+static int __check_func_arg_reg_off(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, argno_t argno,
+ enum bpf_arg_type arg_type,
+ bool btf_id_fixed_off_ok)
{
u32 type = reg->type;
@@ -8055,12 +8056,11 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
- * can be non-zero. This was already checked above. So pass
- * fixed_off_ok as true to allow fixed offset for all other
- * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
- * still need to do checks instead of returning.
+ * can be non-zero unless the caller requires otherwise.
+ * var_off always must be 0 for PTR_TO_BTF_ID, hence we still
+ * need to do checks instead of returning.
*/
- return __check_ptr_off_reg(env, reg, argno, true);
+ return __check_ptr_off_reg(env, reg, argno, btf_id_fixed_off_ok);
case PTR_TO_CTX:
/*
* Allow fixed and variable offsets for syscall context, but
@@ -8076,6 +8076,13 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
}
}
+static int check_func_arg_reg_off(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, argno_t argno,
+ enum bpf_arg_type arg_type)
+{
+ return __check_func_arg_reg_off(env, reg, argno, arg_type, true);
+}
+
static int check_arg_const_str(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, argno_t argno)
{
@@ -11947,6 +11954,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
enum bpf_arg_type arg_type = ARG_DONTCARE;
argno_t argno = argno_from_arg(i + 1);
int regno = reg_from_argno(argno);
+ bool btf_id_fixed_off_ok = true;
u32 ref_id, type_size;
bool is_ret_buf_sz = false;
int kf_arg_type;
@@ -12120,7 +12128,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_MEM:
case KF_ARG_PTR_TO_MEM_SIZE:
case KF_ARG_PTR_TO_CALLBACK:
- case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
case KF_ARG_PTR_TO_CONST_STR:
case KF_ARG_PTR_TO_WORKQUEUE:
case KF_ARG_PTR_TO_TIMER:
@@ -12134,6 +12141,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_CTX:
arg_type = ARG_PTR_TO_CTX;
break;
+ case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+ arg_type = ARG_PTR_TO_BTF_ID;
+ btf_id_fixed_off_ok = false;
+ break;
default:
verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
return -EFAULT;
@@ -12141,7 +12152,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (regno == meta->release_regno)
arg_type |= OBJ_RELEASE;
- ret = check_func_arg_reg_off(env, reg, argno, arg_type);
+ ret = __check_func_arg_reg_off(env, reg, argno, arg_type,
+ btf_id_fixed_off_ok);
if (ret < 0)
return ret;
@@ -19994,13 +20006,13 @@ err_unlock:
if (!is_priv)
mutex_unlock(&bpf_verifier_lock);
bpf_clear_insn_aux_data(env, 0, env->prog->len);
- vfree(env->insn_aux_data);
err_free_env:
bpf_stack_liveness_free(env);
kvfree(env->cfg.insn_postorder);
kvfree(env->scc_info);
kvfree(env->succ);
kvfree(env->gotox_tmp_buf);
+ vfree(env->insn_aux_data);
kvfree(env);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 13e38e89a1f3..f0e2e131a9a5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1009,6 +1009,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->mm_cid.active = 0;
INIT_HLIST_NODE(&tsk->mm_cid.node);
#endif
+
+#ifdef CONFIG_BPF_SYSCALL
+ RCU_INIT_POINTER(tsk->bpf_storage, NULL);
+ tsk->bpf_ctx = NULL;
+#endif
return tsk;
free_stack:
@@ -2247,10 +2252,6 @@ __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
-#ifdef CONFIG_BPF_SYSCALL
- RCU_INIT_POINTER(p->bpf_storage, NULL);
- p->bpf_ctx = NULL;
-#endif
unwind_task_init(p);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 99e3789492a0..c60ba6d292f9 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1515,6 +1515,17 @@ static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***ppr
return 0;
}
+static int sock_map_prog_attach_check(enum bpf_attach_type attach_type,
+ struct bpf_prog *prog)
+{
+ /* A stream parser must not modify the skb, only measure it. */
+ if (prog && attach_type == BPF_SK_SKB_STREAM_PARSER &&
+ prog->aux->changes_pkt_data)
+ return -EINVAL;
+
+ return 0;
+}
+
/* Handle the following four cases:
* prog_attach: prog != NULL, old == NULL, link == NULL
* prog_detach: prog == NULL, old != NULL, link == NULL
@@ -1533,6 +1544,10 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
if (ret)
return ret;
+ ret = sock_map_prog_attach_check(which, prog);
+ if (ret)
+ return ret;
+
/* for prog_attach/prog_detach/link_attach, return error if a bpf_link
* exists for that prog.
*/
@@ -1776,6 +1791,11 @@ static int sock_map_link_update_prog(struct bpf_link *link,
ret = -EINVAL;
goto out;
}
+
+ ret = sock_map_prog_attach_check(link->attach_type, prog);
+ if (ret)
+ goto out;
+
if (!sockmap_link->map) {
ret = -ENOLINK;
goto out;
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 40efde233f3a..7b98f5d1e2be 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -7,6 +7,8 @@
#include <linux/bpf_lsm.h>
#include <uapi/linux/lsm.h>
+bool bpf_lsm_initialized __ro_after_init;
+
static struct security_hook_list bpf_lsm_hooks[] __ro_after_init = {
#define LSM_HOOK(RET, DEFAULT, NAME, ...) \
LSM_HOOK_INIT(NAME, bpf_lsm_##NAME),
@@ -24,6 +26,7 @@ static int __init bpf_lsm_init(void)
{
security_add_hooks(bpf_lsm_hooks, ARRAY_SIZE(bpf_lsm_hooks),
&bpf_lsmid);
+ bpf_lsm_initialized = true;
pr_info("LSM support for eBPF active\n");
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
index 621b3b71888e..1d7231728eaf 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -431,6 +431,35 @@ out:
test_sockmap_strp__destroy(strp);
}
+static void test_sockmap_strp_parser_reject(void)
+{
+ struct test_sockmap_strp *strp = NULL;
+ int parser_mod, parser_ro, link;
+ int err, map;
+
+ strp = test_sockmap_strp__open_and_load();
+ if (!ASSERT_OK_PTR(strp, "test_sockmap_strp__open_and_load"))
+ return;
+
+ map = bpf_map__fd(strp->maps.sock_map);
+ parser_mod = bpf_program__fd(strp->progs.prog_skb_parser_resize);
+ parser_ro = bpf_program__fd(strp->progs.prog_skb_parser);
+
+ err = bpf_prog_attach(parser_mod, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ ASSERT_ERR(err, "bpf_prog_attach parser_mod");
+
+ link = bpf_link_create(parser_ro, map, BPF_SK_SKB_STREAM_PARSER, NULL);
+ if (!ASSERT_GE(link, 0, "bpf_link_create parser_ro"))
+ goto out;
+
+ err = bpf_link_update(link, parser_mod, NULL);
+ ASSERT_ERR(err, "bpf_link_update parser_mod");
+out:
+ if (link >= 0)
+ close(link);
+ test_sockmap_strp__destroy(strp);
+}
+
void test_sockmap_strp(void)
{
if (test__start_subtest("sockmap strp tcp pass"))
@@ -451,4 +480,6 @@ void test_sockmap_strp(void)
test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
if (test__start_subtest("sockmap strp tcp dispatch"))
test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp parser reject pkt mod"))
+ test_sockmap_strp_parser_reject();
}
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 7247a20c0a3b..024ef2aae200 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -13,12 +13,20 @@ struct node_acquire {
struct bpf_refcount refcount;
};
+struct node_refcounted {
+ long key;
+ struct bpf_list_node list;
+ struct bpf_refcount refcount;
+};
+
extern void bpf_rcu_read_lock(void) __ksym;
extern void bpf_rcu_read_unlock(void) __ksym;
#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
private(A) struct bpf_spin_lock glock;
private(A) struct bpf_rb_root groot __contains(node_acquire, node);
+private(B) struct bpf_spin_lock lock;
+private(B) struct bpf_list_head head __contains(node_refcounted, list);
static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
{
@@ -93,6 +101,32 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
return 0;
}
+SEC("?tc")
+__failure __msg("dereference of modified ptr_ ptr R1")
+long refcount_acquire_list_node_offset(void *ctx)
+{
+ struct node_refcounted *node, *base, *ref;
+ struct bpf_list_node *list_node;
+
+ node = bpf_obj_new(typeof(*node));
+ if (!node)
+ return 1;
+
+ bpf_spin_lock(&lock);
+ bpf_list_push_front(&head, &node->list);
+ list_node = bpf_list_pop_front(&head);
+ bpf_spin_unlock(&lock);
+ if (!list_node)
+ return 2;
+
+ base = container_of(list_node, struct node_refcounted, list);
+ ref = bpf_refcount_acquire(list_node);
+ if (ref)
+ bpf_obj_drop(ref);
+ bpf_obj_drop(base);
+ return 0;
+}
+
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("function calls are not allowed while holding a lock")
int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu,
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index c9abfe3a11af..56e9aebf05f2 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -5,28 +5,6 @@
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
- void *data_end = (void *)(long) skb->data_end;
- void *data = (void *)(long) skb->data;
- __u8 *d = data;
- int err;
-
- if (data + 10 > data_end) {
- err = bpf_skb_pull_data(skb, 10);
- if (err)
- return SK_DROP;
-
- data_end = (void *)(long)skb->data_end;
- data = (void *)(long)skb->data;
- if (data + 10 > data_end)
- return SK_DROP;
- }
-
- /* This write/read is a bit pointless but tests the verifier and
- * strparser handler for read/write pkt data and access into sk
- * fields.
- */
- d = data;
- d[7] = 1;
return skb->len;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
index dde3d5bec515..fe88fa6d40bc 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -50,4 +50,11 @@ int prog_skb_parser_partial(struct __sk_buff *skb)
return 10;
}
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_resize(struct __sk_buff *skb)
+{
+ bpf_skb_change_tail(skb, skb->len, 0);
+ return skb->len;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 49f7bd05edad..42de5cff7e52 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -6,6 +6,8 @@
#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
+extern const int bpf_prog_active __ksym;
+
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \
"r2 = 0;" \
@@ -78,6 +80,23 @@ __naked void dummy_prog_loop1_socket(void)
}
SEC("socket")
+__description("unpriv: pseudo btf id log masks address")
+__success_unpriv
+__msg_unpriv("0: (18) r1 = 0x0")
+__not_msg_unpriv("0: (18) r1 = 0x{{[1-9a-f][0-9a-f]*}}")
+__retval_unpriv(0)
+__log_level(2)
+__naked void pseudo_btf_id_log_masks_address(void)
+{
+ asm volatile ("r1 = %[bpf_prog_active] ll;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(bpf_prog_active)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("unpriv: return pointer")
__success __failure_unpriv __msg_unpriv("R0 leaks addr")
__retval(POINTER_VALUE)