diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-07-02 16:39:28 -1000 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-07-02 16:39:28 -1000 |
| commit | 51512e22efe813d8223de27f6fd02a8a48ea2323 (patch) | |
| tree | 71652a4126cce9f0529d265519df11fb118af40b | |
| parent | 826eec5b5efd785dc87638a54d5ecc9f88e5afce (diff) | |
| parent | b72e29e0f7ee329d89f86db8700c8ea99b4a370a (diff) | |
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull BPF fixes from Daniel Borkmann:
- Initialize task local storage before fork bails out to free the task
(Jann Horn)
- Fix insn_aux_data leak on verifier error path (KaFai Wan)
- Reject BPF inode storage map creation when BPF LSM is uninitialized
(Matt Bobrowski)
- Mask pseudo pointer values in verifier logs when pointer leaks are
not allowed (Nuoqi Gui)
- Harden BPF JIT against spraying via IBPB flush (Pawan Gupta)
- Reject a skb-modifying SK_SKB stream parser since the latter is only
meant to measure the next message (Sechang Lim)
- Fix bpf_refcount_acquire to reject refcounted allocation arguments
with a non-zero fixed offset (Yiyang Chen)
* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf: Prefer dirty packs for eBPF allocations
bpf: Prefer packs that won't trigger an IBPB flush on allocation
bpf: Skip redundant IBPB in pack allocator
bpf: Restrict JIT predictor flush to cBPF
x86/bugs: Enable IBPB flush on BPF JIT allocation
bpf: Support for hardening against JIT spraying
bpf: Reject BPF_MAP_TYPE_INODE_STORAGE creation if BPF LSM is uninitialized
bpf,fork: wipe ->bpf_storage before bailouts that access it
bpf: Fix insn_aux_data leak on verifier err_free_env path
selftests/bpf: Cover pseudo-BTF ksym log masking
bpf: Mask pseudo pointer values in verifier logs
selftests/bpf: Cover refcount acquire node offsets
bpf: Reject offset refcount acquire arguments
selftests/bpf: test rejection of a packet-modifying SK_SKB stream parser
bpf, sockmap: reject a packet-modifying SK_SKB stream parser
selftests/bpf: don't modify the skb in the strparser parser prog
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 4 | ||||
| -rw-r--r-- | arch/loongarch/net/bpf_jit.c | 5 | ||||
| -rw-r--r-- | arch/powerpc/net/bpf_jit_comp.c | 4 | ||||
| -rw-r--r-- | arch/riscv/net/bpf_jit_comp64.c | 2 | ||||
| -rw-r--r-- | arch/riscv/net/bpf_jit_core.c | 3 | ||||
| -rw-r--r-- | arch/x86/include/asm/nospec-branch.h | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 50 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 5 | ||||
| -rw-r--r-- | include/linux/bpf_lsm.h | 4 | ||||
| -rw-r--r-- | include/linux/filter.h | 15 | ||||
| -rw-r--r-- | kernel/bpf/bpf_inode_storage.c | 9 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 68 | ||||
| -rw-r--r-- | kernel/bpf/disasm.c | 5 | ||||
| -rw-r--r-- | kernel/bpf/dispatcher.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 34 | ||||
| -rw-r--r-- | kernel/fork.c | 9 | ||||
| -rw-r--r-- | net/core/sock_map.c | 20 | ||||
| -rw-r--r-- | security/bpf/hooks.c | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/sockmap_strp.c | 31 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c | 34 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/sockmap_parse_prog.c | 22 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/test_sockmap_strp.c | 7 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/verifier_unpriv.c | 19 |
23 files changed, 297 insertions, 62 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index f6bcc0e1a950..b0075ece4a6e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2177,7 +2177,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr image_size = extable_offset + extable_size; ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u64), &header, &image_ptr, - jit_fill_hole); + jit_fill_hole, was_classic); if (!ro_header) goto out_off; @@ -2870,7 +2870,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, void *arch_alloc_bpf_trampoline(unsigned int size) { - return bpf_prog_pack_alloc(size, jit_fill_hole); + return bpf_prog_pack_alloc(size, jit_fill_hole, false); } void arch_free_bpf_trampoline(void *image, unsigned int size) diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index ad7e28375aa9..2738b4db1165 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1790,7 +1790,7 @@ static int invoke_bpf(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn, void *arch_alloc_bpf_trampoline(unsigned int size) { - return bpf_prog_pack_alloc(size, jit_fill_hole); + return bpf_prog_pack_alloc(size, jit_fill_hole, false); } void arch_free_bpf_trampoline(void *image, unsigned int size) @@ -2256,7 +2256,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr image_size = prog_size + extable_size; /* Now we know the size of the structure to make */ ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32), - &header, &image_ptr, jit_fill_hole); + &header, &image_ptr, jit_fill_hole, + bpf_prog_was_classic(prog)); if (!ro_header) goto out_offset; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d4a17e18c9fb..7b07b43575f1 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -295,7 +295,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image, - bpf_jit_fill_ill_insns); + bpf_jit_fill_ill_insns, bpf_prog_was_classic(fp)); if (!fhdr) goto out_err; @@ -588,7 +588,7 @@ bool bpf_jit_inlines_helper_call(s32 imm) void *arch_alloc_bpf_trampoline(unsigned int size) { - return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); + return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns, false); } void arch_free_bpf_trampoline(void *image, unsigned int size) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c03c1de16b79..f9d5347ba966 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1321,7 +1321,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, void *arch_alloc_bpf_trampoline(unsigned int size) { - return bpf_prog_pack_alloc(size, bpf_fill_ill_insns); + return bpf_prog_pack_alloc(size, bpf_fill_ill_insns, false); } void arch_free_bpf_trampoline(void *image, unsigned int size) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 4365d07aaf54..ce3bd3762e08 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -109,7 +109,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr bpf_jit_binary_pack_alloc(prog_size + extable_size, &jit_data->ro_image, sizeof(u32), &jit_data->header, &jit_data->image, - bpf_fill_ill_insns); + bpf_fill_ill_insns, + bpf_prog_was_classic(prog)); if (!jit_data->ro_header) goto out_offset; diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4f4b5e8a1574..b68892e6d58c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -388,6 +388,10 @@ extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void write_ibpb(void); +#ifdef CONFIG_BPF_JIT +extern void bpf_arch_ibpb(void); +#endif + #ifdef CONFIG_X86_64 extern void clear_bhb_loop(void); #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 83f51cab0b1e..d9af230c0512 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include <linux/sched/smt.h> #include <linux/pgtable.h> #include <linux/bpf.h> +#include <linux/filter.h> #include <linux/kvm_types.h> #include <asm/spec-ctrl.h> @@ -1651,8 +1652,21 @@ static inline const char *spectre_v2_module_string(void) { return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; } + +/* + * The "retpoline sequence" is the "call;mov;ret" sequence that + * replaces normal indirect branch instructions. Differentiate + * *the* retpoline sequence from the LFENCE-prefixed indirect + * branches that simply use the retpoline infrastructure. + */ +static inline bool retpoline_seq_enabled(void) +{ + return boot_cpu_has(X86_FEATURE_RETPOLINE) && !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE); +} + #else static inline const char *spectre_v2_module_string(void) { return ""; } +static inline bool retpoline_seq_enabled(void) { return false; } #endif #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" @@ -2095,8 +2109,7 @@ static void __init bhi_apply_mitigation(void) return; /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ - if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + if (retpoline_seq_enabled()) { spec_ctrl_disable_kernel_rrsba(); if (rrsba_disabled) return; @@ -2238,6 +2251,27 @@ static void __init spectre_v2_update_mitigation(void) pr_info("%s\n", spectre_v2_strings[spectre_v2_enabled]); } +#ifdef CONFIG_BPF_JIT +static void __bpf_arch_ibpb(void *unused) +{ + write_ibpb(); +} + +void bpf_arch_ibpb(void) +{ + on_each_cpu(__bpf_arch_ibpb, NULL, 1); +} + +static bool __init cpu_wants_ibpb_bpf(void) +{ + /* A genuine retpoline already neutralizes ring0 indirect predictions */ + if (retpoline_seq_enabled()) + return false; + + return boot_cpu_has(X86_FEATURE_IBPB); +} +#endif + static void __init spectre_v2_apply_mitigation(void) { if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) @@ -2314,6 +2348,14 @@ static void __init spectre_v2_apply_mitigation(void) setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } + +#ifdef CONFIG_BPF_JIT + if (cpu_wants_ibpb_bpf()) { + static_call_update(bpf_arch_pred_flush, bpf_arch_ibpb); + static_branch_enable(&bpf_pred_flush_enabled); + pr_info("Enabling IBPB for BPF\n"); + } +#endif } static void update_stibp_msr(void * __unused) @@ -3490,9 +3532,7 @@ static const char *spectre_bhi_state(void) return "; BHI: BHI_DIS_S"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) return "; BHI: SW loop, KVM: SW loop"; - else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && - !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && - rrsba_disabled) + else if (retpoline_seq_enabled() && rrsba_disabled) return "; BHI: Retpoline"; else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_VMEXIT)) return "; BHI: Vulnerable, KVM: SW loop"; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 054e043ffcd2..de7515ea1bea 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3653,7 +3653,7 @@ cleanup: void *arch_alloc_bpf_trampoline(unsigned int size) { - return bpf_prog_pack_alloc(size, jit_fill_hole); + return bpf_prog_pack_alloc(size, jit_fill_hole, false); } void arch_free_bpf_trampoline(void *image, unsigned int size) @@ -3965,7 +3965,8 @@ out_image: /* allocate module memory for x86 insns and extable */ header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size, &image, align, &rw_header, &rw_image, - jit_fill_hole); + jit_fill_hole, + bpf_prog_was_classic(prog)); if (!header) goto out_addrs; prog->aux->extable = (void *) image + roundup(proglen, align); diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 143775a27a2a..dda272d78f01 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -14,6 +14,8 @@ #ifdef CONFIG_BPF_LSM +extern bool bpf_lsm_initialized __ro_after_init; + #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ RET bpf_lsm_##NAME(__VA_ARGS__); #include <linux/lsm_hook_defs.h> @@ -56,6 +58,8 @@ bool bpf_lsm_hook_returns_errno(u32 btf_id); #else /* !CONFIG_BPF_LSM */ +#define bpf_lsm_initialized false + static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) { return false; diff --git a/include/linux/filter.h b/include/linux/filter.h index 67d337ede91b..14acb2455746 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -21,6 +21,7 @@ #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/sockptr.h> +#include <linux/static_call.h> #include <linux/u64_stats_sync.h> #include <net/sch_generic.h> @@ -1314,6 +1315,15 @@ extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); +/* + * Flush the indirect branch predictors before reusing JIT memory, so that + * indirect jumps into a newly written program don't reuse predictions left + * behind by an old program that occupied the same space. + */ +void bpf_arch_pred_flush(void); +DECLARE_STATIC_CALL(bpf_arch_pred_flush, bpf_arch_pred_flush); +DECLARE_STATIC_KEY_FALSE(bpf_pred_flush_enabled); + void bpf_jit_fill_hole_with_zero(void *area, unsigned int size); struct bpf_binary_header * @@ -1328,7 +1338,7 @@ void bpf_jit_free(struct bpf_prog *fp); struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); -void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic); void bpf_prog_pack_free(void *ptr, u32 size); static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) @@ -1342,7 +1352,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, unsigned int alignment, struct bpf_binary_header **rw_hdr, u8 **rw_image, - bpf_jit_fill_hole_t bpf_fill_ill_insns); + bpf_jit_fill_hole_t bpf_fill_ill_insns, + bool was_classic); int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header); void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 0da8d923e39d..f9e81060c1f4 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -178,6 +178,15 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { + /* + * Do not allow allocation of BPF_MAP_TYPE_INODE_STORAGE if the BPF LSM + * was not initialized by the LSM framework at boot. Without proper + * initialization, the BPF inode security blob offset remains unprepared, + * causing bpf_inode() to calculate an invalid memory offset and corrupt + * inode->i_security. + */ + if (!bpf_lsm_initialized) + return ERR_PTR(-EOPNOTSUPP); return bpf_local_storage_map_alloc(attr, &inode_cache); } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 649cce41e13f..6e19a030da6f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -20,6 +20,7 @@ #include <uapi/linux/btf.h> #include <linux/filter.h> #include <linux/skbuff.h> +#include <linux/static_call.h> #include <linux/vmalloc.h> #include <linux/prandom.h> #include <linux/bpf.h> @@ -875,6 +876,7 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_prog_pack { struct list_head list; void *ptr; + bool arch_flush_needed; unsigned long bitmap[]; }; @@ -883,6 +885,15 @@ void bpf_jit_fill_hole_with_zero(void *area, unsigned int size) memset(area, 0, size); } +DEFINE_STATIC_CALL_NULL(bpf_arch_pred_flush, bpf_arch_pred_flush); + +/* + * Enabled once bpf_arch_pred_flush points at a real flush routine. Lets the + * pack allocator test "is a predictor flush wired up at all" with a cheap + * static branch instead of repeatedly querying the static call target. + */ +DEFINE_STATIC_KEY_FALSE(bpf_pred_flush_enabled); + #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) static DEFINE_MUTEX(pack_mutex); @@ -918,6 +929,8 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); + if (static_branch_unlikely(&bpf_pred_flush_enabled)) + pack->arch_flush_needed = true; set_vm_flush_reset_perms(pack->ptr); err = set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); @@ -932,15 +945,23 @@ out: return NULL; } -void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns, bool was_classic) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); - struct bpf_prog_pack *pack; - unsigned long pos; + struct bpf_prog_pack *pack, *fallback_pack = NULL; + unsigned long pos, fallback_pos = 0; void *ptr = NULL; mutex_lock(&pack_mutex); if (size > BPF_PROG_PACK_SIZE) { + /* + * Allocations larger than a pack get their own pages, and + * predictors are not flushed for such allocation. This is only + * safe because cBPF programs (the unprivileged attack surface) + * are bounded well below a pack size. + */ + if (was_classic && static_branch_unlikely(&bpf_pred_flush_enabled)) + pr_warn_once("BPF: Predictors not flushed for allocations greater than BPF_PROG_PACK_SIZE\n"); size = round_up(size, PAGE_SIZE); ptr = bpf_jit_alloc_exec(size); if (ptr) { @@ -960,8 +981,29 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) list_for_each_entry(pack, &pack_list, list) { pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, nbits, 0); - if (pos < BPF_PROG_CHUNK_COUNT) + if (pos >= BPF_PROG_CHUNK_COUNT) + continue; + /* Flush not enabled, use any pack */ + if (!static_branch_unlikely(&bpf_pred_flush_enabled)) goto found_free_area; + /* + * cBPF reuse of a dirty pack triggers a flush, so prefer a + * clean pack for cBPF. eBPF never flushes, so steer it to a + * dirty pack and keep clean packs free for cBPF. + */ + if (was_classic ^ pack->arch_flush_needed) + goto found_free_area; + if (!fallback_pack) { + fallback_pack = pack; + fallback_pos = pos; + } + } + + /* No preferred pack found */ + if (fallback_pack) { + pack = fallback_pack; + pos = fallback_pos; + goto found_free_area; } pack = alloc_new_pack(bpf_fill_ill_insns); @@ -971,6 +1013,16 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) pos = 0; found_free_area: + /* Flush only for cBPF as it may contain a crafted gadget */ + if (static_branch_unlikely(&bpf_pred_flush_enabled) && + pack->arch_flush_needed && + was_classic) { + struct bpf_prog_pack *p; + + static_call_cond(bpf_arch_pred_flush)(); + list_for_each_entry(p, &pack_list, list) + p->arch_flush_needed = false; + } bitmap_set(pack->bitmap, pos, nbits); ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT); @@ -1008,6 +1060,9 @@ void bpf_prog_pack_free(void *ptr, u32 size) "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); bitmap_clear(pack->bitmap, pos, nbits); + + if (static_branch_unlikely(&bpf_pred_flush_enabled)) + pack->arch_flush_needed = true; if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, BPF_PROG_CHUNK_COUNT, 0) == 0) { list_del(&pack->list); @@ -1130,7 +1185,8 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, struct bpf_binary_header **rw_header, u8 **rw_image, - bpf_jit_fill_hole_t bpf_fill_ill_insns) + bpf_jit_fill_hole_t bpf_fill_ill_insns, + bool was_classic) { struct bpf_binary_header *ro_header; u32 size, hole, start; @@ -1143,7 +1199,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, if (bpf_jit_charge_modmem(size)) return NULL; - ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns); + ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns, was_classic); if (!ro_header) { bpf_jit_uncharge_modmem(size); return NULL; diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index f8a3c7eb451e..0391b3bc0073 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -323,7 +323,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD || - insn->src_reg == BPF_PSEUDO_MAP_VALUE; + insn->src_reg == BPF_PSEUDO_MAP_VALUE || + insn->src_reg == BPF_PSEUDO_MAP_IDX || + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE || + insn->src_reg == BPF_PSEUDO_BTF_ID; char tmp[64]; if (is_ptr && !allow_ptr_leaks) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index b77db7413f8c..ea2d60dc1fee 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -145,7 +145,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero); + d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero, false); if (!d->image) goto out; d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 21a365d436a5..6515d4d3c003 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7996,9 +7996,10 @@ reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) return field; } -static int check_func_arg_reg_off(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, argno_t argno, - enum bpf_arg_type arg_type) +static int __check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, argno_t argno, + enum bpf_arg_type arg_type, + bool btf_id_fixed_off_ok) { u32 type = reg->type; @@ -8055,12 +8056,11 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: /* When referenced PTR_TO_BTF_ID is passed to release function, * its fixed offset must be 0. In the other cases, fixed offset - * can be non-zero. This was already checked above. So pass - * fixed_off_ok as true to allow fixed offset for all other - * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we - * still need to do checks instead of returning. + * can be non-zero unless the caller requires otherwise. + * var_off always must be 0 for PTR_TO_BTF_ID, hence we still + * need to do checks instead of returning. */ - return __check_ptr_off_reg(env, reg, argno, true); + return __check_ptr_off_reg(env, reg, argno, btf_id_fixed_off_ok); case PTR_TO_CTX: /* * Allow fixed and variable offsets for syscall context, but @@ -8076,6 +8076,13 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, } } +static int check_func_arg_reg_off(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, argno_t argno, + enum bpf_arg_type arg_type) +{ + return __check_func_arg_reg_off(env, reg, argno, arg_type, true); +} + static int check_arg_const_str(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno) { @@ -11947,6 +11954,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ enum bpf_arg_type arg_type = ARG_DONTCARE; argno_t argno = argno_from_arg(i + 1); int regno = reg_from_argno(argno); + bool btf_id_fixed_off_ok = true; u32 ref_id, type_size; bool is_ret_buf_sz = false; int kf_arg_type; @@ -12120,7 +12128,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM: case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: - case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: case KF_ARG_PTR_TO_TIMER: @@ -12134,6 +12141,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_CTX: arg_type = ARG_PTR_TO_CTX; break; + case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + arg_type = ARG_PTR_TO_BTF_ID; + btf_id_fixed_off_ok = false; + break; default: verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type); return -EFAULT; @@ -12141,7 +12152,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (regno == meta->release_regno) arg_type |= OBJ_RELEASE; - ret = check_func_arg_reg_off(env, reg, argno, arg_type); + ret = __check_func_arg_reg_off(env, reg, argno, arg_type, + btf_id_fixed_off_ok); if (ret < 0) return ret; @@ -19994,13 +20006,13 @@ err_unlock: if (!is_priv) mutex_unlock(&bpf_verifier_lock); bpf_clear_insn_aux_data(env, 0, env->prog->len); - vfree(env->insn_aux_data); err_free_env: bpf_stack_liveness_free(env); kvfree(env->cfg.insn_postorder); kvfree(env->scc_info); kvfree(env->succ); kvfree(env->gotox_tmp_buf); + vfree(env->insn_aux_data); kvfree(env); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index 13e38e89a1f3..f0e2e131a9a5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1009,6 +1009,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->mm_cid.active = 0; INIT_HLIST_NODE(&tsk->mm_cid.node); #endif + +#ifdef CONFIG_BPF_SYSCALL + RCU_INIT_POINTER(tsk->bpf_storage, NULL); + tsk->bpf_ctx = NULL; +#endif return tsk; free_stack: @@ -2247,10 +2252,6 @@ __latent_entropy struct task_struct *copy_process( p->sequential_io = 0; p->sequential_io_avg = 0; #endif -#ifdef CONFIG_BPF_SYSCALL - RCU_INIT_POINTER(p->bpf_storage, NULL); - p->bpf_ctx = NULL; -#endif unwind_task_init(p); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 99e3789492a0..c60ba6d292f9 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1515,6 +1515,17 @@ static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***ppr return 0; } +static int sock_map_prog_attach_check(enum bpf_attach_type attach_type, + struct bpf_prog *prog) +{ + /* A stream parser must not modify the skb, only measure it. */ + if (prog && attach_type == BPF_SK_SKB_STREAM_PARSER && + prog->aux->changes_pkt_data) + return -EINVAL; + + return 0; +} + /* Handle the following four cases: * prog_attach: prog != NULL, old == NULL, link == NULL * prog_detach: prog == NULL, old != NULL, link == NULL @@ -1533,6 +1544,10 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, if (ret) return ret; + ret = sock_map_prog_attach_check(which, prog); + if (ret) + return ret; + /* for prog_attach/prog_detach/link_attach, return error if a bpf_link * exists for that prog. */ @@ -1776,6 +1791,11 @@ static int sock_map_link_update_prog(struct bpf_link *link, ret = -EINVAL; goto out; } + + ret = sock_map_prog_attach_check(link->attach_type, prog); + if (ret) + goto out; + if (!sockmap_link->map) { ret = -ENOLINK; goto out; diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c index 40efde233f3a..7b98f5d1e2be 100644 --- a/security/bpf/hooks.c +++ b/security/bpf/hooks.c @@ -7,6 +7,8 @@ #include <linux/bpf_lsm.h> #include <uapi/linux/lsm.h> +bool bpf_lsm_initialized __ro_after_init; + static struct security_hook_list bpf_lsm_hooks[] __ro_after_init = { #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ LSM_HOOK_INIT(NAME, bpf_lsm_##NAME), @@ -24,6 +26,7 @@ static int __init bpf_lsm_init(void) { security_add_hooks(bpf_lsm_hooks, ARRAY_SIZE(bpf_lsm_hooks), &bpf_lsmid); + bpf_lsm_initialized = true; pr_info("LSM support for eBPF active\n"); return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c index 621b3b71888e..1d7231728eaf 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c @@ -431,6 +431,35 @@ out: test_sockmap_strp__destroy(strp); } +static void test_sockmap_strp_parser_reject(void) +{ + struct test_sockmap_strp *strp = NULL; + int parser_mod, parser_ro, link; + int err, map; + + strp = test_sockmap_strp__open_and_load(); + if (!ASSERT_OK_PTR(strp, "test_sockmap_strp__open_and_load")) + return; + + map = bpf_map__fd(strp->maps.sock_map); + parser_mod = bpf_program__fd(strp->progs.prog_skb_parser_resize); + parser_ro = bpf_program__fd(strp->progs.prog_skb_parser); + + err = bpf_prog_attach(parser_mod, map, BPF_SK_SKB_STREAM_PARSER, 0); + ASSERT_ERR(err, "bpf_prog_attach parser_mod"); + + link = bpf_link_create(parser_ro, map, BPF_SK_SKB_STREAM_PARSER, NULL); + if (!ASSERT_GE(link, 0, "bpf_link_create parser_ro")) + goto out; + + err = bpf_link_update(link, parser_mod, NULL); + ASSERT_ERR(err, "bpf_link_update parser_mod"); +out: + if (link >= 0) + close(link); + test_sockmap_strp__destroy(strp); +} + void test_sockmap_strp(void) { if (test__start_subtest("sockmap strp tcp pass")) @@ -451,4 +480,6 @@ void test_sockmap_strp(void) test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM); if (test__start_subtest("sockmap strp tcp dispatch")) test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp parser reject pkt mod")) + test_sockmap_strp_parser_reject(); } diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 7247a20c0a3b..024ef2aae200 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -13,12 +13,20 @@ struct node_acquire { struct bpf_refcount refcount; }; +struct node_refcounted { + long key; + struct bpf_list_node list; + struct bpf_refcount refcount; +}; + extern void bpf_rcu_read_lock(void) __ksym; extern void bpf_rcu_read_unlock(void) __ksym; #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_acquire, node); +private(B) struct bpf_spin_lock lock; +private(B) struct bpf_list_head head __contains(node_refcounted, list); static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) { @@ -93,6 +101,32 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) return 0; } +SEC("?tc") +__failure __msg("dereference of modified ptr_ ptr R1") +long refcount_acquire_list_node_offset(void *ctx) +{ + struct node_refcounted *node, *base, *ref; + struct bpf_list_node *list_node; + + node = bpf_obj_new(typeof(*node)); + if (!node) + return 1; + + bpf_spin_lock(&lock); + bpf_list_push_front(&head, &node->list); + list_node = bpf_list_pop_front(&head); + bpf_spin_unlock(&lock); + if (!list_node) + return 2; + + base = container_of(list_node, struct node_refcounted, list); + ref = bpf_refcount_acquire(list_node); + if (ref) + bpf_obj_drop(ref); + bpf_obj_drop(base); + return 0; +} + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") __failure __msg("function calls are not allowed while holding a lock") int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index c9abfe3a11af..56e9aebf05f2 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -5,28 +5,6 @@ SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { - void *data_end = (void *)(long) skb->data_end; - void *data = (void *)(long) skb->data; - __u8 *d = data; - int err; - - if (data + 10 > data_end) { - err = bpf_skb_pull_data(skb, 10); - if (err) - return SK_DROP; - - data_end = (void *)(long)skb->data_end; - data = (void *)(long)skb->data; - if (data + 10 > data_end) - return SK_DROP; - } - - /* This write/read is a bit pointless but tests the verifier and - * strparser handler for read/write pkt data and access into sk - * fields. - */ - d = data; - d[7] = 1; return skb->len; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c index dde3d5bec515..fe88fa6d40bc 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c @@ -50,4 +50,11 @@ int prog_skb_parser_partial(struct __sk_buff *skb) return 10; } +SEC("sk_skb/stream_parser") +int prog_skb_parser_resize(struct __sk_buff *skb) +{ + bpf_skb_change_tail(skb, skb->len, 0); + return skb->len; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index 49f7bd05edad..42de5cff7e52 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -6,6 +6,8 @@ #include "../../../include/linux/filter.h" #include "bpf_misc.h" +extern const int bpf_prog_active __ksym; + #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ "r2 = 0;" \ @@ -78,6 +80,23 @@ __naked void dummy_prog_loop1_socket(void) } SEC("socket") +__description("unpriv: pseudo btf id log masks address") +__success_unpriv +__msg_unpriv("0: (18) r1 = 0x0") +__not_msg_unpriv("0: (18) r1 = 0x{{[1-9a-f][0-9a-f]*}}") +__retval_unpriv(0) +__log_level(2) +__naked void pseudo_btf_id_log_masks_address(void) +{ + asm volatile ("r1 = %[bpf_prog_active] ll;" + "r0 = 0;" + "exit;" + : + : __imm_addr(bpf_prog_active) + : __clobber_all); +} + +SEC("socket") __description("unpriv: return pointer") __success __failure_unpriv __msg_unpriv("R0 leaks addr") __retval(POINTER_VALUE) |
