diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-25 14:09:26 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-25 14:09:26 -0700 |
| commit | 4edcdefd4083ae04b1a5656f4be6cd83ae919ef4 (patch) | |
| tree | 52f12981c8044a73f2b0963555bd1505c42217f1 /kernel | |
| parent | 8c04c1292dca29a57ea82c6a44348be49749fc22 (diff) | |
| parent | 12091470c6b4c1c14b2de12dcbae2ada6cb6d20b (diff) | |
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov:
- Fix effective prog array index with BPF_F_PREORDER (Amery Hung)
- Zero-initialize the fib lookup flow struct (Avinash Duduskar)
- Disable xfrm_decode_session hook attachment (Bradley Morgan)
- Allow type tag BTF records to succeed other modifier records (Emil
Tsalapatis)
- Fix build_id caching in stack_map_get_build_id_offset() (Ihor
Solodrai)
- Add missing access_ok call to copy_user_syms (Jiri Olsa)
- Fix stack slot index in nospec checks (Nuoqi Gui)
- Preserve pointer spill metadata during half-slot cleanup (Nuoqi Gui)
- Fix partial copy of non-linear test_run output (Sun Jian)
- Fix BPF_PROG_ASSOC_STRUCT_OPS last field check (ThiƩbaud Weksteen)
- Reset register bounds before narrowing retval range (Tristan Madani)
- Fix vmlinux BTF leak in bpftool cgroup commands (Yichong Chen)
- Guard error writes in conntrack kfuncs (Yiyang Chen)
* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf: Disable xfrm_decode_session hook attachment
selftests/bpf: Add test for stale bounds on LSM retval context load
bpf: Reset register bounds before narrowing retval range in check_mem_access()
selftests/bpf: Cover small conntrack opts error writes
bpf: Guard conntrack opts error writes
selftests/bpf: Cover half-slot cleanup of pointer spills
bpf: Preserve pointer spill metadata during half-slot cleanup
selftests/bpf: Test cgroup link replace with BPF_F_PREORDER
bpf: Fix effective prog array index with BPF_F_PREORDER
bpf: Fix BPF_PROG_ASSOC_STRUCT_OPS last field check
bpf: zero-initialize the fib lookup flow struct
bpftool: Fix vmlinux BTF leak in cgroup commands
bpf: Add missing access_ok call to copy_user_syms
bpf: Allow type tag BTF records to succeed other modifier records
bpf: Emit verbose message when prog-specific btf_struct_access rejects a write
bpf: Fix build_id caching in stack_map_get_build_id_offset()
bpf: Fix partial copy of non-linear test_run output
selftests/bpf: Cover stack nospec slot indexing
bpf: Fix stack slot index in nospec checks
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/bpf_lsm.c | 3 | ||||
| -rw-r--r-- | kernel/bpf/btf.c | 209 | ||||
| -rw-r--r-- | kernel/bpf/cgroup.c | 108 | ||||
| -rw-r--r-- | kernel/bpf/stackmap.c | 183 | ||||
| -rw-r--r-- | kernel/bpf/states.c | 13 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 8 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 11 |
8 files changed, 339 insertions, 198 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 564071a92d7d..1433809bb166 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -51,6 +51,9 @@ BTF_ID(func, bpf_lsm_key_getsecurity) #ifdef CONFIG_AUDIT BTF_ID(func, bpf_lsm_audit_rule_match) #endif +#ifdef CONFIG_SECURITY_NETWORK_XFRM +BTF_ID(func, bpf_lsm_xfrm_decode_session) +#endif BTF_ID(func, bpf_lsm_ismaclabel) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_SET_END(bpf_lsm_disabled_hooks) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 15ae7c43f594..64572f85edc8 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -28,6 +28,7 @@ #include <linux/string.h> #include <linux/sysfs.h> #include <linux/overflow.h> +#include <linux/bitops.h> #include <net/netfilter/nf_bpf_link.h> @@ -3472,12 +3473,69 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t, return BTF_FIELD_FOUND; } +struct btf_type_tag_match { + const char *name; + u32 flag; +}; + +struct btf_type_tag_walk_ctx { + const struct btf_type *t; /* Input/Output */ + u32 id; /* Output */ + u32 res; /* Output */ +}; + +static int btf_type_tag_walk(const struct btf *btf, + struct btf_type_tag_walk_ctx *ctx, + const struct btf_type_tag_match *matches, + u32 match_cnt) +{ + const struct btf_type *t = ctx->t; + u32 res = 0; + const char *tag; + u32 id, i; + + do { + id = t->type; + t = btf_type_by_id(btf, id); + + if (!btf_type_is_modifier(t)) + break; + + if (!btf_type_is_type_tag(t) || btf_type_kflag(t)) + continue; + + tag = __btf_name_by_offset(btf, t->name_off); + for (i = 0; i < match_cnt; i++) { + if (strcmp(tag, matches[i].name)) + continue; + res |= matches[i].flag; + break; + } + } while (true); + + /* We only support a single tag. */ + if (hweight32(res) > 1) + return -EINVAL; + + ctx->t = t; + ctx->id = id; + ctx->res = res; + + return 0; +} + static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, u32 off, int sz, struct btf_field_info *info, u32 field_mask) { - enum btf_field_type type; - const char *tag_value; - bool is_type_tag; + static const struct btf_type_tag_match kptr_type_tags[] = { + { "kptr_untrusted", BPF_KPTR_UNREF }, + { "kptr", BPF_KPTR_REF }, + { "percpu_kptr", BPF_KPTR_PERCPU }, + { "uptr", BPF_UPTR }, + }; + struct btf_type_tag_walk_ctx ctx; + enum btf_field_type type = 0; + int err; u32 res_id; /* Permit modifiers on the pointer itself */ @@ -3486,30 +3544,20 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, /* For PTR, sz is always == 8 */ if (!btf_type_is_ptr(t)) return BTF_FIELD_IGNORE; - t = btf_type_by_id(btf, t->type); - is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t); - if (!is_type_tag) - return BTF_FIELD_IGNORE; - /* Reject extra tags */ - if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) - return -EINVAL; - tag_value = __btf_name_by_offset(btf, t->name_off); - if (!strcmp("kptr_untrusted", tag_value)) - type = BPF_KPTR_UNREF; - else if (!strcmp("kptr", tag_value)) - type = BPF_KPTR_REF; - else if (!strcmp("percpu_kptr", tag_value)) - type = BPF_KPTR_PERCPU; - else if (!strcmp("uptr", tag_value)) - type = BPF_UPTR; - else - return -EINVAL; + + ctx.t = t; + err = btf_type_tag_walk(btf, &ctx, kptr_type_tags, + ARRAY_SIZE(kptr_type_tags)); + if (err) + return err; + + t = ctx.t; + res_id = ctx.id; + type = ctx.res; if (!(type & field_mask)) return BTF_FIELD_IGNORE; - /* Get the base type */ - t = btf_type_skip_modifiers(btf, t->type, &res_id); /* Only pointer to struct is allowed */ if (!__btf_type_is_struct(t)) return -EINVAL; @@ -5859,11 +5907,10 @@ struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func); } -static int btf_check_type_tags(struct btf_verifier_env *env, - struct btf *btf, int start_id) +static int btf_check_modifier_chain_length(struct btf_verifier_env *env, + struct btf *btf, int start_id) { int i, n, good_id = start_id - 1; - bool in_tags; n = btf_nr_types(btf); for (i = start_id; i < n; i++) { @@ -5879,20 +5926,12 @@ static int btf_check_type_tags(struct btf_verifier_env *env, cond_resched(); - in_tags = btf_type_is_type_tag(t); while (btf_type_is_modifier(t)) { if (!chain_limit--) { btf_verifier_log(env, "Max chain length or cycle detected"); return -ELOOP; } - if (btf_type_is_type_tag(t)) { - if (!in_tags) { - btf_verifier_log(env, "Type tags don't precede modifiers"); - return -EINVAL; - } - } else if (in_tags) { - in_tags = false; - } + if (cur_id <= good_id) break; /* Move to next type */ @@ -5970,7 +6009,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, if (err) goto errout; - err = btf_check_type_tags(env, btf, 1); + err = btf_check_modifier_chain_length(env, btf, 1); if (err) goto errout; @@ -6378,7 +6417,7 @@ static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name if (err) goto errout; - err = btf_check_type_tags(env, btf, 1); + err = btf_check_modifier_chain_length(env, btf, 1); if (err) goto errout; @@ -6504,7 +6543,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, if (err) goto errout; - err = btf_check_type_tags(env, btf, btf_nr_types(base_btf)); + err = btf_check_modifier_chain_length(env, btf, btf_nr_types(base_btf)); if (err) goto errout; @@ -6810,14 +6849,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { + static const struct btf_type_tag_match ctx_type_tags[] = { + { "user", MEM_USER }, + { "percpu", MEM_PERCPU }, + }; const struct btf_type *t = prog->aux->attach_func_proto; struct bpf_prog *tgt_prog = prog->aux->dst_prog; struct btf *btf = bpf_prog_get_target_btf(prog); const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; + struct btf_type_tag_walk_ctx ctx; const struct btf_param *args; bool ptr_err_raw_tp = false; - const char *tag_value; u32 nr_args, arg; int i, ret; @@ -7020,22 +7063,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } info->btf = btf; - info->btf_id = t->type; - t = btf_type_by_id(btf, t->type); - - if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) { - tag_value = __btf_name_by_offset(btf, t->name_off); - if (strcmp(tag_value, "user") == 0) - info->reg_type |= MEM_USER; - if (strcmp(tag_value, "percpu") == 0) - info->reg_type |= MEM_PERCPU; + ctx.t = t; + ret = btf_type_tag_walk(btf, &ctx, ctx_type_tags, + ARRAY_SIZE(ctx_type_tags)); + if (ret) { + bpf_log(log, "func '%s' arg%d type %s has multiple type tags\n", + tname, arg, btf_type_str(t)); + return false; } + info->reg_type |= ctx.res; + info->btf_id = ctx.id; + t = ctx.t; - /* skip modifiers */ - while (btf_type_is_modifier(t)) { - info->btf_id = t->type; - t = btf_type_by_id(btf, t->type); - } if (!btf_type_is_struct(t)) { bpf_log(log, "func '%s' arg%d type %s is not a struct\n", @@ -7074,7 +7113,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; const struct btf_type *mtype, *elem_type = NULL; const struct btf_member *member; - const char *tname, *mname, *tag_value; + const char *tname, *mname; u32 vlen, elem_id, mid; again: @@ -7270,8 +7309,15 @@ error: } if (btf_type_is_ptr(mtype)) { - const struct btf_type *stype, *t; + static const struct btf_type_tag_match walk_type_tags[] = { + { "user", MEM_USER }, + { "percpu", MEM_PERCPU }, + { "rcu", MEM_RCU }, + }; enum bpf_type_flag tmp_flag = 0; + struct btf_type_tag_walk_ctx ctx = { .t = mtype }; + const struct btf_type *stype; + int err; u32 id; if (msize != size || off != moff) { @@ -7281,22 +7327,17 @@ error: return -EACCES; } - /* check type tag */ - t = btf_type_by_id(btf, mtype->type); - if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) { - tag_value = __btf_name_by_offset(btf, t->name_off); - /* check __user tag */ - if (strcmp(tag_value, "user") == 0) - tmp_flag = MEM_USER; - /* check __percpu tag */ - if (strcmp(tag_value, "percpu") == 0) - tmp_flag = MEM_PERCPU; - /* check __rcu tag */ - if (strcmp(tag_value, "rcu") == 0) - tmp_flag = MEM_RCU; + err = btf_type_tag_walk(btf, &ctx, walk_type_tags, + ARRAY_SIZE(walk_type_tags)); + if (err) { + bpf_log(log, "type '%s' has multiple type tags\n", + btf_type_str(mtype)); + return err; } + tmp_flag = ctx.res; + id = ctx.id; + stype = ctx.t; - stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; *flag |= tmp_flag; @@ -7867,7 +7908,12 @@ static int btf_scan_type_tags(struct bpf_verifier_env *env, const struct btf *btf, u32 type_id, u32 *tags) { + static const struct btf_type_tag_match func_type_tags[] = { + { "arena", ARG_TAG_ARENA }, + }; + struct btf_type_tag_walk_ctx ctx; const struct btf_type *t; + int err; /* Find the first pointer type in the chain. */ t = btf_type_skip_modifiers(btf, type_id, NULL); @@ -7879,24 +7925,15 @@ static int btf_scan_type_tags(struct bpf_verifier_env *env, if (!t || !btf_type_is_ptr(t)) return 0; - /* We got a pointer, get all associated type tags. */ - for (t = btf_type_by_id(btf, t->type); t && btf_type_is_modifier(t); - t = btf_type_by_id(btf, t->type)) { - - /* Skip non-type tag modifiers. */ - if (!btf_type_is_type_tag(t)) - continue; - - const char *tag = __btf_name_by_offset(btf, t->name_off); - - if (strcmp(tag, "arena") == 0) { - *tags |= ARG_TAG_ARENA; - } else { - bpf_log(&env->log, "function signature member has unsupported type tag '%s'\n", - tag); - return -EOPNOTSUPP; - } + ctx.t = t; + err = btf_type_tag_walk(btf, &ctx, func_type_tags, + ARRAY_SIZE(func_type_tags)); + if (err) { + bpf_log(&env->log, + "function signature member has multiple type tags\n"); + return err; } + *tags |= ctx.res; return 0; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 83ce66296ac1..4355ccb78a9c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -939,19 +939,65 @@ static int cgroup_bpf_attach(struct cgroup *cgrp, return ret; } +static int effective_prog_pos(struct cgroup *cgrp, + enum cgroup_bpf_attach_type atype, + struct bpf_prog_list *target_pl) +{ + int cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart, pos = -1; + struct bpf_prog_list *pl; + struct cgroup *p = cgrp; + + /* count effective programs to find where the preorder region ends */ + do { + if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt); + p = cgroup_parent(p); + } while (p); + + /* replay compute_effective_progs() placement and record target's slot */ + cnt = 0; + p = cgrp; + fstart = preorder_cnt; + bstart = preorder_cnt - 1; + do { + if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + continue; + + init_bstart = bstart; + hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { + if (!prog_list_prog(pl)) + continue; + + if (pl->flags & BPF_F_PREORDER) { + if (pl == target_pl) + pos = bstart; + bstart--; + } else { + if (pl == target_pl) + pos = fstart; + fstart++; + } + cnt++; + } + + /* reverse pre-ordering progs at this cgroup level */ + if (pos >= bstart + 1 && pos <= init_bstart) + pos = bstart + 1 + init_bstart - pos; + } while ((p = cgroup_parent(p))); + + return pos; +} + /* Swap updated BPF program for given link in effective program arrays across * all descendant cgroups. This function is guaranteed to succeed. */ static void replace_effective_prog(struct cgroup *cgrp, enum cgroup_bpf_attach_type atype, - struct bpf_cgroup_link *link) + struct bpf_prog_list *pl) { struct bpf_prog_array_item *item; struct cgroup_subsys_state *css; struct bpf_prog_array *progs; - struct bpf_prog_list *pl; - struct hlist_head *head; - struct cgroup *cg; int pos; css_for_each_descendant_pre(css, &cgrp->self) { @@ -960,27 +1006,15 @@ static void replace_effective_prog(struct cgroup *cgrp, if (percpu_ref_is_zero(&desc->bpf.refcnt)) continue; - /* find position of link in effective progs array */ - for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { - if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) - continue; + pos = effective_prog_pos(desc, atype, pl); + if (WARN_ON_ONCE(pos < 0)) + continue; - head = &cg->bpf.progs[atype]; - hlist_for_each_entry(pl, head, node) { - if (!prog_list_prog(pl)) - continue; - if (pl->link == link) - goto found; - pos++; - } - } -found: - BUG_ON(!cg); progs = rcu_dereference_protected( desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); item = &progs->items[pos]; - WRITE_ONCE(item->prog, link->link.prog); + WRITE_ONCE(item->prog, pl->link->link.prog); } } @@ -1024,7 +1058,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, cgrp->bpf.revisions[atype] += 1; old_prog = xchg(&link->link.prog, new_prog); - replace_effective_prog(cgrp, atype, link); + replace_effective_prog(cgrp, atype, pl); bpf_prog_put(old_prog); return 0; } @@ -1091,19 +1125,14 @@ static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs, * recomputing the array in place. * * @cgrp: The cgroup which descendants to travers - * @prog: A program to detach or NULL - * @link: A link to detach or NULL + * @pl: The prog_list entry being detached * @atype: Type of detach operation */ -static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_cgroup_link *link, +static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog_list *pl, enum cgroup_bpf_attach_type atype) { struct cgroup_subsys_state *css; struct bpf_prog_array *progs; - struct bpf_prog_list *pl; - struct hlist_head *head; - struct cgroup *cg; int pos; /* recompute effective prog array in place */ @@ -1113,24 +1142,11 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, if (percpu_ref_is_zero(&desc->bpf.refcnt)) continue; - /* find position of link or prog in effective progs array */ - for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { - if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) - continue; - - head = &cg->bpf.progs[atype]; - hlist_for_each_entry(pl, head, node) { - if (!prog_list_prog(pl)) - continue; - if (pl->prog == prog && pl->link == link) - goto found; - pos++; - } - } - + pos = effective_prog_pos(desc, atype, pl); /* no link or prog match, skip the cgroup of this layer */ - continue; -found: + if (pos < 0) + continue; + progs = rcu_dereference_protected( desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); @@ -1196,7 +1212,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, /* if update effective array failed replace the prog with a dummy prog*/ pl->prog = old_prog; pl->link = link; - purge_effective_progs(cgrp, old_prog, link, atype); + purge_effective_progs(cgrp, pl, atype); } /* now can actually delete it from this cgroup list */ diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 77ba03216c09..41fe87d7302f 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -175,6 +175,95 @@ static inline void stack_map_build_id_set_valid(struct bpf_stack_build_id *id, memcpy(id->build_id, build_id, BUILD_ID_SIZE_MAX); } +/* + * A cached VMA lookup result. The range [vm_start, vm_end) is always set. + * vm_pgoff, file, build_id are set only when the build ID was resolved. + * Zero vm_end marks the slot empty. build_id aliases the id_offs[] entry. + */ +struct stack_map_cached_vma { + unsigned long vm_start; + unsigned long vm_end; + unsigned long vm_pgoff; + struct file *file; /* pinned in the sleepable path; NULL otherwise */ + const unsigned char *build_id; +}; + +/* + * Per stack_map_get_build_id_offset() call cache of the last VMA with a build ID + * resolved and the last VMA with no usable build ID. Adjacent stack frames tend + * to land in the same VMA or the same backing file, so caching the last result + * of each kind lets us skip unnecessary VMA lookups and build ID parse calls. + * Keeping the two slots independent means a build-ID-less VMA doesn't evict the + * last resolved build ID. + */ +struct stack_map_build_id_cache { + struct stack_map_cached_vma resolved; + struct stack_map_cached_vma unresolved; +}; + +/* + * Fill @id from a cached range covering @ip. On a hit this writes @id (resolved + * range -> build ID + offset, unresolved range -> raw ip) and returns 0; on a + * miss it leaves @id untouched and returns -ENOENT. + */ +static int stack_map_build_id_set_from_cache(struct stack_map_build_id_cache *cache, + struct bpf_stack_build_id *id, u64 ip) +{ + unsigned long vm_start, vm_end, vm_pgoff; + u64 offset; + + vm_start = cache->resolved.vm_start; + vm_end = cache->resolved.vm_end; + if (vm_end && ip >= vm_start && ip < vm_end) { + vm_pgoff = cache->resolved.vm_pgoff; + offset = stack_map_build_id_offset(vm_pgoff, vm_start, ip); + stack_map_build_id_set_valid(id, offset, cache->resolved.build_id); + return 0; + } + + vm_start = cache->unresolved.vm_start; + vm_end = cache->unresolved.vm_end; + if (vm_end && ip >= vm_start && ip < vm_end) { + stack_map_build_id_set_ip(id); + return 0; + } + + return -ENOENT; +} + +/* + * Record @vma's build ID as the last resolved one. @file is the pinned backing + * file in the sleepable path (released when evicted), or NULL otherwise. + */ +static void stack_map_build_id_cache_set_resolved(struct stack_map_build_id_cache *cache, + struct file *file, + const unsigned char *build_id, + unsigned long vm_start, + unsigned long vm_end, + unsigned long vm_pgoff) +{ + if (cache->resolved.file) + fput(cache->resolved.file); + cache->resolved = (struct stack_map_cached_vma){ + .vm_start = vm_start, + .vm_end = vm_end, + .vm_pgoff = vm_pgoff, + .file = file, + .build_id = build_id, + }; +} + +/* Record [vm_start, vm_end) as a range with no usable build ID. */ +static void stack_map_build_id_cache_set_unresolved(struct stack_map_build_id_cache *cache, + unsigned long vm_start, + unsigned long vm_end) +{ + cache->unresolved = (struct stack_map_cached_vma){ + .vm_start = vm_start, + .vm_end = vm_end, + }; +} + struct stack_map_vma_lock { struct vm_area_struct *vma; struct mm_struct *mm; @@ -244,15 +333,9 @@ static void stack_map_unlock_vma(struct stack_map_vma_lock *lock) static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *id_offs, u32 trace_nr) { - struct mm_struct *mm = current->mm; - struct stack_map_vma_lock lock = { .mm = mm }; - struct { - struct file *file; - const unsigned char *build_id; - unsigned long vm_start; - unsigned long vm_end; - unsigned long vm_pgoff; - } cache = {}; + struct stack_map_vma_lock lock = { .mm = current->mm }; + struct stack_map_build_id_cache cache = {}; + struct stack_map_cached_vma *res = &cache.resolved; unsigned long vm_pgoff, vm_start, vm_end; struct vm_area_struct *vma; struct file *file; @@ -262,44 +345,39 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i for (u32 i = 0; i < trace_nr; i++) { ip = READ_ONCE(id_offs[i].ip); - /* - * Range cache fast path: if ip falls within the previously - * resolved VMA range, reuse the cache build_id without - * re-acquiring the VMA lock. - */ - if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) { - offset = stack_map_build_id_offset(cache.vm_pgoff, cache.vm_start, ip); - stack_map_build_id_set_valid(&id_offs[i], offset, cache.build_id); + if (!stack_map_build_id_set_from_cache(&cache, &id_offs[i], ip)) continue; - } vma = stack_map_lock_vma(&lock, ip); if (!vma) { stack_map_build_id_set_ip(&id_offs[i]); continue; } + + vm_pgoff = vma->vm_pgoff; + vm_start = vma->vm_start; + vm_end = vma->vm_end; + if (vma_is_anonymous(vma) || !vma->vm_file) { - stack_map_build_id_set_ip(&id_offs[i]); stack_map_unlock_vma(&lock); + stack_map_build_id_set_ip(&id_offs[i]); + stack_map_build_id_cache_set_unresolved(&cache, vm_start, vm_end); continue; } file = vma->vm_file; - vm_pgoff = vma->vm_pgoff; - vm_start = vma->vm_start; - vm_end = vma->vm_end; offset = stack_map_build_id_offset(vm_pgoff, vm_start, ip); /* - * Same backing file as previous (e.g. different VMAs - * of the same ELF binary). Reuse the cache build_id. + * Same backing file as the last resolved VMA (another mapping + * of the same ELF binary): reuse its build_id without re-parsing. */ - if (file == cache.file) { + if (file == res->file) { stack_map_unlock_vma(&lock); - stack_map_build_id_set_valid(&id_offs[i], offset, cache.build_id); - cache.vm_start = vm_start; - cache.vm_end = vm_end; - cache.vm_pgoff = vm_pgoff; + stack_map_build_id_set_valid(&id_offs[i], offset, res->build_id); + res->vm_start = vm_start; + res->vm_end = vm_end; + res->vm_pgoff = vm_pgoff; continue; } @@ -310,21 +388,17 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i if (build_id_parse_file(file, id_offs[i].build_id, NULL)) { stack_map_build_id_set_ip(&id_offs[i]); fput(file); + stack_map_build_id_cache_set_unresolved(&cache, vm_start, vm_end); continue; } stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id); - if (cache.file) - fput(cache.file); - cache.file = file; - cache.build_id = id_offs[i].build_id; - cache.vm_start = vm_start; - cache.vm_end = vm_end; - cache.vm_pgoff = vm_pgoff; + stack_map_build_id_cache_set_resolved(&cache, file, id_offs[i].build_id, + vm_start, vm_end, vm_pgoff); } - if (cache.file) - fput(cache.file); + if (res->file) + fput(res->file); } /* @@ -343,8 +417,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, struct mmap_unlock_irq_work *work = NULL; bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); bool has_user_ctx = user && current && current->mm; - struct vm_area_struct *vma, *prev_vma = NULL; - const unsigned char *prev_build_id = NULL; + struct stack_map_build_id_cache cache = {}; + struct vm_area_struct *vma; int i; if (may_fault && has_user_ctx) { @@ -365,27 +439,30 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, for (i = 0; i < trace_nr; i++) { u64 ip = READ_ONCE(id_offs[i].ip); - u64 offset; - if (prev_build_id && range_in_vma(prev_vma, ip, ip)) { - vma = prev_vma; - offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip); - stack_map_build_id_set_valid(&id_offs[i], offset, prev_build_id); + if (!stack_map_build_id_set_from_cache(&cache, &id_offs[i], ip)) continue; - } + vma = find_vma(current->mm, ip); if (!vma || vma_is_anonymous(vma) || fetch_build_id(vma, id_offs[i].build_id, may_fault)) { - /* per entry fall back to ips */ + /* per entry fall back to ips; cache build-ID-less range */ stack_map_build_id_set_ip(&id_offs[i]); - prev_vma = vma; - prev_build_id = NULL; + if (vma) + stack_map_build_id_cache_set_unresolved(&cache, + vma->vm_start, vma->vm_end); continue; } - offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip); - stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id); - prev_vma = vma; - prev_build_id = id_offs[i].build_id; + /* + * mmap_lock is held for the whole loop, so the cached VMA + * fields stay valid; no file pinning is needed here. + */ + stack_map_build_id_set_valid(&id_offs[i], + stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip), + id_offs[i].build_id); + stack_map_build_id_cache_set_resolved(&cache, NULL, id_offs[i].build_id, + vma->vm_start, vma->vm_end, + vma->vm_pgoff); } bpf_mmap_unlock_mm(work, current->mm); } diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c index 32f346ce3ffc..ea2153cf28d0 100644 --- a/kernel/bpf/states.c +++ b/kernel/bpf/states.c @@ -436,12 +436,10 @@ static void __clean_func_state(struct bpf_verifier_env *env, continue; /* - * Only destroy spilled_ptr when hi half is dead. - * If hi half is still live with STACK_SPILL, the - * spilled_ptr metadata is needed for correct state - * comparison in stacksafe(). - * is_spilled_reg() is using slot_type[7], but - * is_spilled_scalar_after() check either slot_type[0] or [4] + * Only scalar spills can be degraded to raw stack bytes + * when their high half is dead. Pointer spills need the + * saved spilled_ptr metadata so partial fills keep + * rejecting as non-scalar register fills. */ if (!hi_live) { struct bpf_reg_state *spill = &st->stack[i].spilled_ptr; @@ -449,6 +447,9 @@ static void __clean_func_state(struct bpf_verifier_env *env, if (lo_live && stype == STACK_SPILL) { u8 val = STACK_MISC; + if (spill->type != SCALAR_VALUE) + continue; + /* * 8 byte spill of scalar 0 where half slot is dead * should become STACK_ZERO in lo 4 bytes. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b44106c8ea75..6db306d23b47 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6308,7 +6308,7 @@ static int prog_stream_read(union bpf_attr *attr) return ret; } -#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd +#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.flags static int prog_assoc_struct_ops(union bpf_attr *attr) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2abc79dbf281..21a365d436a5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3479,7 +3479,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, bool sanitize = reg && is_spillable_regtype(reg->type); for (i = 0; i < size; i++) { - u8 type = state->stack[spi].slot_type[i]; + u8 type = state->stack[spi].slot_type[(slot - i) % + BPF_REG_SIZE]; if (type != STACK_MISC && type != STACK_ZERO) { sanitize = true; @@ -5786,6 +5787,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, return -EFAULT; } ret = env->ops->btf_struct_access(&env->log, reg, off, size); + if (ret < 0) + verbose(env, + "%s cannot write into ptr_%s at off=%d size=%d\n", + reg_arg_name(env, argno), tname, off, size); } else { /* Writes are permitted with default btf_struct_access for * program allocated objects (which always have id > 0), @@ -6196,6 +6201,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b */ if (info.reg_type == SCALAR_VALUE) { if (info.is_retval && get_func_retval_range(env->prog, &range)) { + mark_reg_unknown(env, regs, value_regno); err = __mark_reg_s32_range(env, regs, value_regno, range.minval, range.maxval); if (err) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 82f8feea6931..75495a5c3507 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2376,9 +2376,12 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 int err = -ENOMEM; unsigned int i; + if (!access_ok(usyms, cnt * sizeof(*usyms))) + return -EFAULT; + syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL); if (!syms) - goto error; + return -ENOMEM; buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL); if (!buf) @@ -2403,10 +2406,8 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 return 0; error: - if (err) { - kvfree(syms); - kvfree(buf); - } + kvfree(syms); + kvfree(buf); return err; } |
