summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-25 14:09:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-25 14:09:26 -0700
commit4edcdefd4083ae04b1a5656f4be6cd83ae919ef4 (patch)
tree52f12981c8044a73f2b0963555bd1505c42217f1 /kernel
parent8c04c1292dca29a57ea82c6a44348be49749fc22 (diff)
parent12091470c6b4c1c14b2de12dcbae2ada6cb6d20b (diff)
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov: - Fix effective prog array index with BPF_F_PREORDER (Amery Hung) - Zero-initialize the fib lookup flow struct (Avinash Duduskar) - Disable xfrm_decode_session hook attachment (Bradley Morgan) - Allow type tag BTF records to succeed other modifier records (Emil Tsalapatis) - Fix build_id caching in stack_map_get_build_id_offset() (Ihor Solodrai) - Add missing access_ok call to copy_user_syms (Jiri Olsa) - Fix stack slot index in nospec checks (Nuoqi Gui) - Preserve pointer spill metadata during half-slot cleanup (Nuoqi Gui) - Fix partial copy of non-linear test_run output (Sun Jian) - Fix BPF_PROG_ASSOC_STRUCT_OPS last field check (ThiƩbaud Weksteen) - Reset register bounds before narrowing retval range (Tristan Madani) - Fix vmlinux BTF leak in bpftool cgroup commands (Yichong Chen) - Guard error writes in conntrack kfuncs (Yiyang Chen) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf: Disable xfrm_decode_session hook attachment selftests/bpf: Add test for stale bounds on LSM retval context load bpf: Reset register bounds before narrowing retval range in check_mem_access() selftests/bpf: Cover small conntrack opts error writes bpf: Guard conntrack opts error writes selftests/bpf: Cover half-slot cleanup of pointer spills bpf: Preserve pointer spill metadata during half-slot cleanup selftests/bpf: Test cgroup link replace with BPF_F_PREORDER bpf: Fix effective prog array index with BPF_F_PREORDER bpf: Fix BPF_PROG_ASSOC_STRUCT_OPS last field check bpf: zero-initialize the fib lookup flow struct bpftool: Fix vmlinux BTF leak in cgroup commands bpf: Add missing access_ok call to copy_user_syms bpf: Allow type tag BTF records to succeed other modifier records bpf: Emit verbose message when prog-specific btf_struct_access rejects a write bpf: Fix build_id caching in stack_map_get_build_id_offset() bpf: Fix partial copy of non-linear test_run output selftests/bpf: Cover stack nospec slot indexing bpf: Fix stack slot index in nospec checks
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_lsm.c3
-rw-r--r--kernel/bpf/btf.c209
-rw-r--r--kernel/bpf/cgroup.c108
-rw-r--r--kernel/bpf/stackmap.c183
-rw-r--r--kernel/bpf/states.c13
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c8
-rw-r--r--kernel/trace/bpf_trace.c11
8 files changed, 339 insertions, 198 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 564071a92d7d..1433809bb166 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -51,6 +51,9 @@ BTF_ID(func, bpf_lsm_key_getsecurity)
#ifdef CONFIG_AUDIT
BTF_ID(func, bpf_lsm_audit_rule_match)
#endif
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+BTF_ID(func, bpf_lsm_xfrm_decode_session)
+#endif
BTF_ID(func, bpf_lsm_ismaclabel)
BTF_ID(func, bpf_lsm_file_alloc_security)
BTF_SET_END(bpf_lsm_disabled_hooks)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 15ae7c43f594..64572f85edc8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -28,6 +28,7 @@
#include <linux/string.h>
#include <linux/sysfs.h>
#include <linux/overflow.h>
+#include <linux/bitops.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -3472,12 +3473,69 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
return BTF_FIELD_FOUND;
}
+struct btf_type_tag_match {
+ const char *name;
+ u32 flag;
+};
+
+struct btf_type_tag_walk_ctx {
+ const struct btf_type *t; /* Input/Output */
+ u32 id; /* Output */
+ u32 res; /* Output */
+};
+
+static int btf_type_tag_walk(const struct btf *btf,
+ struct btf_type_tag_walk_ctx *ctx,
+ const struct btf_type_tag_match *matches,
+ u32 match_cnt)
+{
+ const struct btf_type *t = ctx->t;
+ u32 res = 0;
+ const char *tag;
+ u32 id, i;
+
+ do {
+ id = t->type;
+ t = btf_type_by_id(btf, id);
+
+ if (!btf_type_is_modifier(t))
+ break;
+
+ if (!btf_type_is_type_tag(t) || btf_type_kflag(t))
+ continue;
+
+ tag = __btf_name_by_offset(btf, t->name_off);
+ for (i = 0; i < match_cnt; i++) {
+ if (strcmp(tag, matches[i].name))
+ continue;
+ res |= matches[i].flag;
+ break;
+ }
+ } while (true);
+
+ /* We only support a single tag. */
+ if (hweight32(res) > 1)
+ return -EINVAL;
+
+ ctx->t = t;
+ ctx->id = id;
+ ctx->res = res;
+
+ return 0;
+}
+
static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
u32 off, int sz, struct btf_field_info *info, u32 field_mask)
{
- enum btf_field_type type;
- const char *tag_value;
- bool is_type_tag;
+ static const struct btf_type_tag_match kptr_type_tags[] = {
+ { "kptr_untrusted", BPF_KPTR_UNREF },
+ { "kptr", BPF_KPTR_REF },
+ { "percpu_kptr", BPF_KPTR_PERCPU },
+ { "uptr", BPF_UPTR },
+ };
+ struct btf_type_tag_walk_ctx ctx;
+ enum btf_field_type type = 0;
+ int err;
u32 res_id;
/* Permit modifiers on the pointer itself */
@@ -3486,30 +3544,20 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
/* For PTR, sz is always == 8 */
if (!btf_type_is_ptr(t))
return BTF_FIELD_IGNORE;
- t = btf_type_by_id(btf, t->type);
- is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t);
- if (!is_type_tag)
- return BTF_FIELD_IGNORE;
- /* Reject extra tags */
- if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
- return -EINVAL;
- tag_value = __btf_name_by_offset(btf, t->name_off);
- if (!strcmp("kptr_untrusted", tag_value))
- type = BPF_KPTR_UNREF;
- else if (!strcmp("kptr", tag_value))
- type = BPF_KPTR_REF;
- else if (!strcmp("percpu_kptr", tag_value))
- type = BPF_KPTR_PERCPU;
- else if (!strcmp("uptr", tag_value))
- type = BPF_UPTR;
- else
- return -EINVAL;
+
+ ctx.t = t;
+ err = btf_type_tag_walk(btf, &ctx, kptr_type_tags,
+ ARRAY_SIZE(kptr_type_tags));
+ if (err)
+ return err;
+
+ t = ctx.t;
+ res_id = ctx.id;
+ type = ctx.res;
if (!(type & field_mask))
return BTF_FIELD_IGNORE;
- /* Get the base type */
- t = btf_type_skip_modifiers(btf, t->type, &res_id);
/* Only pointer to struct is allowed */
if (!__btf_type_is_struct(t))
return -EINVAL;
@@ -5859,11 +5907,10 @@ struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func);
}
-static int btf_check_type_tags(struct btf_verifier_env *env,
- struct btf *btf, int start_id)
+static int btf_check_modifier_chain_length(struct btf_verifier_env *env,
+ struct btf *btf, int start_id)
{
int i, n, good_id = start_id - 1;
- bool in_tags;
n = btf_nr_types(btf);
for (i = start_id; i < n; i++) {
@@ -5879,20 +5926,12 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
cond_resched();
- in_tags = btf_type_is_type_tag(t);
while (btf_type_is_modifier(t)) {
if (!chain_limit--) {
btf_verifier_log(env, "Max chain length or cycle detected");
return -ELOOP;
}
- if (btf_type_is_type_tag(t)) {
- if (!in_tags) {
- btf_verifier_log(env, "Type tags don't precede modifiers");
- return -EINVAL;
- }
- } else if (in_tags) {
- in_tags = false;
- }
+
if (cur_id <= good_id)
break;
/* Move to next type */
@@ -5970,7 +6009,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr,
if (err)
goto errout;
- err = btf_check_type_tags(env, btf, 1);
+ err = btf_check_modifier_chain_length(env, btf, 1);
if (err)
goto errout;
@@ -6378,7 +6417,7 @@ static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name
if (err)
goto errout;
- err = btf_check_type_tags(env, btf, 1);
+ err = btf_check_modifier_chain_length(env, btf, 1);
if (err)
goto errout;
@@ -6504,7 +6543,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
if (err)
goto errout;
- err = btf_check_type_tags(env, btf, btf_nr_types(base_btf));
+ err = btf_check_modifier_chain_length(env, btf, btf_nr_types(base_btf));
if (err)
goto errout;
@@ -6810,14 +6849,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
+ static const struct btf_type_tag_match ctx_type_tags[] = {
+ { "user", MEM_USER },
+ { "percpu", MEM_PERCPU },
+ };
const struct btf_type *t = prog->aux->attach_func_proto;
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
struct btf *btf = bpf_prog_get_target_btf(prog);
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
+ struct btf_type_tag_walk_ctx ctx;
const struct btf_param *args;
bool ptr_err_raw_tp = false;
- const char *tag_value;
u32 nr_args, arg;
int i, ret;
@@ -7020,22 +7063,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
info->btf = btf;
- info->btf_id = t->type;
- t = btf_type_by_id(btf, t->type);
-
- if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
- tag_value = __btf_name_by_offset(btf, t->name_off);
- if (strcmp(tag_value, "user") == 0)
- info->reg_type |= MEM_USER;
- if (strcmp(tag_value, "percpu") == 0)
- info->reg_type |= MEM_PERCPU;
+ ctx.t = t;
+ ret = btf_type_tag_walk(btf, &ctx, ctx_type_tags,
+ ARRAY_SIZE(ctx_type_tags));
+ if (ret) {
+ bpf_log(log, "func '%s' arg%d type %s has multiple type tags\n",
+ tname, arg, btf_type_str(t));
+ return false;
}
+ info->reg_type |= ctx.res;
+ info->btf_id = ctx.id;
+ t = ctx.t;
- /* skip modifiers */
- while (btf_type_is_modifier(t)) {
- info->btf_id = t->type;
- t = btf_type_by_id(btf, t->type);
- }
if (!btf_type_is_struct(t)) {
bpf_log(log,
"func '%s' arg%d type %s is not a struct\n",
@@ -7074,7 +7113,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
- const char *tname, *mname, *tag_value;
+ const char *tname, *mname;
u32 vlen, elem_id, mid;
again:
@@ -7270,8 +7309,15 @@ error:
}
if (btf_type_is_ptr(mtype)) {
- const struct btf_type *stype, *t;
+ static const struct btf_type_tag_match walk_type_tags[] = {
+ { "user", MEM_USER },
+ { "percpu", MEM_PERCPU },
+ { "rcu", MEM_RCU },
+ };
enum bpf_type_flag tmp_flag = 0;
+ struct btf_type_tag_walk_ctx ctx = { .t = mtype };
+ const struct btf_type *stype;
+ int err;
u32 id;
if (msize != size || off != moff) {
@@ -7281,22 +7327,17 @@ error:
return -EACCES;
}
- /* check type tag */
- t = btf_type_by_id(btf, mtype->type);
- if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
- tag_value = __btf_name_by_offset(btf, t->name_off);
- /* check __user tag */
- if (strcmp(tag_value, "user") == 0)
- tmp_flag = MEM_USER;
- /* check __percpu tag */
- if (strcmp(tag_value, "percpu") == 0)
- tmp_flag = MEM_PERCPU;
- /* check __rcu tag */
- if (strcmp(tag_value, "rcu") == 0)
- tmp_flag = MEM_RCU;
+ err = btf_type_tag_walk(btf, &ctx, walk_type_tags,
+ ARRAY_SIZE(walk_type_tags));
+ if (err) {
+ bpf_log(log, "type '%s' has multiple type tags\n",
+ btf_type_str(mtype));
+ return err;
}
+ tmp_flag = ctx.res;
+ id = ctx.id;
+ stype = ctx.t;
- stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
*flag |= tmp_flag;
@@ -7867,7 +7908,12 @@ static int btf_scan_type_tags(struct bpf_verifier_env *env,
const struct btf *btf, u32 type_id,
u32 *tags)
{
+ static const struct btf_type_tag_match func_type_tags[] = {
+ { "arena", ARG_TAG_ARENA },
+ };
+ struct btf_type_tag_walk_ctx ctx;
const struct btf_type *t;
+ int err;
/* Find the first pointer type in the chain. */
t = btf_type_skip_modifiers(btf, type_id, NULL);
@@ -7879,24 +7925,15 @@ static int btf_scan_type_tags(struct bpf_verifier_env *env,
if (!t || !btf_type_is_ptr(t))
return 0;
- /* We got a pointer, get all associated type tags. */
- for (t = btf_type_by_id(btf, t->type); t && btf_type_is_modifier(t);
- t = btf_type_by_id(btf, t->type)) {
-
- /* Skip non-type tag modifiers. */
- if (!btf_type_is_type_tag(t))
- continue;
-
- const char *tag = __btf_name_by_offset(btf, t->name_off);
-
- if (strcmp(tag, "arena") == 0) {
- *tags |= ARG_TAG_ARENA;
- } else {
- bpf_log(&env->log, "function signature member has unsupported type tag '%s'\n",
- tag);
- return -EOPNOTSUPP;
- }
+ ctx.t = t;
+ err = btf_type_tag_walk(btf, &ctx, func_type_tags,
+ ARRAY_SIZE(func_type_tags));
+ if (err) {
+ bpf_log(&env->log,
+ "function signature member has multiple type tags\n");
+ return err;
}
+ *tags |= ctx.res;
return 0;
}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 83ce66296ac1..4355ccb78a9c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -939,19 +939,65 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
return ret;
}
+static int effective_prog_pos(struct cgroup *cgrp,
+ enum cgroup_bpf_attach_type atype,
+ struct bpf_prog_list *target_pl)
+{
+ int cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart, pos = -1;
+ struct bpf_prog_list *pl;
+ struct cgroup *p = cgrp;
+
+ /* count effective programs to find where the preorder region ends */
+ do {
+ if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt);
+ p = cgroup_parent(p);
+ } while (p);
+
+ /* replay compute_effective_progs() placement and record target's slot */
+ cnt = 0;
+ p = cgrp;
+ fstart = preorder_cnt;
+ bstart = preorder_cnt - 1;
+ do {
+ if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ init_bstart = bstart;
+ hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
+ if (!prog_list_prog(pl))
+ continue;
+
+ if (pl->flags & BPF_F_PREORDER) {
+ if (pl == target_pl)
+ pos = bstart;
+ bstart--;
+ } else {
+ if (pl == target_pl)
+ pos = fstart;
+ fstart++;
+ }
+ cnt++;
+ }
+
+ /* reverse pre-ordering progs at this cgroup level */
+ if (pos >= bstart + 1 && pos <= init_bstart)
+ pos = bstart + 1 + init_bstart - pos;
+ } while ((p = cgroup_parent(p)));
+
+ return pos;
+}
+
/* Swap updated BPF program for given link in effective program arrays across
* all descendant cgroups. This function is guaranteed to succeed.
*/
static void replace_effective_prog(struct cgroup *cgrp,
enum cgroup_bpf_attach_type atype,
- struct bpf_cgroup_link *link)
+ struct bpf_prog_list *pl)
{
struct bpf_prog_array_item *item;
struct cgroup_subsys_state *css;
struct bpf_prog_array *progs;
- struct bpf_prog_list *pl;
- struct hlist_head *head;
- struct cgroup *cg;
int pos;
css_for_each_descendant_pre(css, &cgrp->self) {
@@ -960,27 +1006,15 @@ static void replace_effective_prog(struct cgroup *cgrp,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- /* find position of link in effective progs array */
- for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
- if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
- continue;
+ pos = effective_prog_pos(desc, atype, pl);
+ if (WARN_ON_ONCE(pos < 0))
+ continue;
- head = &cg->bpf.progs[atype];
- hlist_for_each_entry(pl, head, node) {
- if (!prog_list_prog(pl))
- continue;
- if (pl->link == link)
- goto found;
- pos++;
- }
- }
-found:
- BUG_ON(!cg);
progs = rcu_dereference_protected(
desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
item = &progs->items[pos];
- WRITE_ONCE(item->prog, link->link.prog);
+ WRITE_ONCE(item->prog, pl->link->link.prog);
}
}
@@ -1024,7 +1058,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
cgrp->bpf.revisions[atype] += 1;
old_prog = xchg(&link->link.prog, new_prog);
- replace_effective_prog(cgrp, atype, link);
+ replace_effective_prog(cgrp, atype, pl);
bpf_prog_put(old_prog);
return 0;
}
@@ -1091,19 +1125,14 @@ static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
* recomputing the array in place.
*
* @cgrp: The cgroup which descendants to travers
- * @prog: A program to detach or NULL
- * @link: A link to detach or NULL
+ * @pl: The prog_list entry being detached
* @atype: Type of detach operation
*/
-static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_cgroup_link *link,
+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog_list *pl,
enum cgroup_bpf_attach_type atype)
{
struct cgroup_subsys_state *css;
struct bpf_prog_array *progs;
- struct bpf_prog_list *pl;
- struct hlist_head *head;
- struct cgroup *cg;
int pos;
/* recompute effective prog array in place */
@@ -1113,24 +1142,11 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- /* find position of link or prog in effective progs array */
- for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
- if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
- continue;
-
- head = &cg->bpf.progs[atype];
- hlist_for_each_entry(pl, head, node) {
- if (!prog_list_prog(pl))
- continue;
- if (pl->prog == prog && pl->link == link)
- goto found;
- pos++;
- }
- }
-
+ pos = effective_prog_pos(desc, atype, pl);
/* no link or prog match, skip the cgroup of this layer */
- continue;
-found:
+ if (pos < 0)
+ continue;
+
progs = rcu_dereference_protected(
desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
@@ -1196,7 +1212,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
/* if update effective array failed replace the prog with a dummy prog*/
pl->prog = old_prog;
pl->link = link;
- purge_effective_progs(cgrp, old_prog, link, atype);
+ purge_effective_progs(cgrp, pl, atype);
}
/* now can actually delete it from this cgroup list */
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 77ba03216c09..41fe87d7302f 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -175,6 +175,95 @@ static inline void stack_map_build_id_set_valid(struct bpf_stack_build_id *id,
memcpy(id->build_id, build_id, BUILD_ID_SIZE_MAX);
}
+/*
+ * A cached VMA lookup result. The range [vm_start, vm_end) is always set.
+ * vm_pgoff, file, build_id are set only when the build ID was resolved.
+ * Zero vm_end marks the slot empty. build_id aliases the id_offs[] entry.
+ */
+struct stack_map_cached_vma {
+ unsigned long vm_start;
+ unsigned long vm_end;
+ unsigned long vm_pgoff;
+ struct file *file; /* pinned in the sleepable path; NULL otherwise */
+ const unsigned char *build_id;
+};
+
+/*
+ * Per stack_map_get_build_id_offset() call cache of the last VMA with a build ID
+ * resolved and the last VMA with no usable build ID. Adjacent stack frames tend
+ * to land in the same VMA or the same backing file, so caching the last result
+ * of each kind lets us skip unnecessary VMA lookups and build ID parse calls.
+ * Keeping the two slots independent means a build-ID-less VMA doesn't evict the
+ * last resolved build ID.
+ */
+struct stack_map_build_id_cache {
+ struct stack_map_cached_vma resolved;
+ struct stack_map_cached_vma unresolved;
+};
+
+/*
+ * Fill @id from a cached range covering @ip. On a hit this writes @id (resolved
+ * range -> build ID + offset, unresolved range -> raw ip) and returns 0; on a
+ * miss it leaves @id untouched and returns -ENOENT.
+ */
+static int stack_map_build_id_set_from_cache(struct stack_map_build_id_cache *cache,
+ struct bpf_stack_build_id *id, u64 ip)
+{
+ unsigned long vm_start, vm_end, vm_pgoff;
+ u64 offset;
+
+ vm_start = cache->resolved.vm_start;
+ vm_end = cache->resolved.vm_end;
+ if (vm_end && ip >= vm_start && ip < vm_end) {
+ vm_pgoff = cache->resolved.vm_pgoff;
+ offset = stack_map_build_id_offset(vm_pgoff, vm_start, ip);
+ stack_map_build_id_set_valid(id, offset, cache->resolved.build_id);
+ return 0;
+ }
+
+ vm_start = cache->unresolved.vm_start;
+ vm_end = cache->unresolved.vm_end;
+ if (vm_end && ip >= vm_start && ip < vm_end) {
+ stack_map_build_id_set_ip(id);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * Record @vma's build ID as the last resolved one. @file is the pinned backing
+ * file in the sleepable path (released when evicted), or NULL otherwise.
+ */
+static void stack_map_build_id_cache_set_resolved(struct stack_map_build_id_cache *cache,
+ struct file *file,
+ const unsigned char *build_id,
+ unsigned long vm_start,
+ unsigned long vm_end,
+ unsigned long vm_pgoff)
+{
+ if (cache->resolved.file)
+ fput(cache->resolved.file);
+ cache->resolved = (struct stack_map_cached_vma){
+ .vm_start = vm_start,
+ .vm_end = vm_end,
+ .vm_pgoff = vm_pgoff,
+ .file = file,
+ .build_id = build_id,
+ };
+}
+
+/* Record [vm_start, vm_end) as a range with no usable build ID. */
+static void stack_map_build_id_cache_set_unresolved(struct stack_map_build_id_cache *cache,
+ unsigned long vm_start,
+ unsigned long vm_end)
+{
+ cache->unresolved = (struct stack_map_cached_vma){
+ .vm_start = vm_start,
+ .vm_end = vm_end,
+ };
+}
+
struct stack_map_vma_lock {
struct vm_area_struct *vma;
struct mm_struct *mm;
@@ -244,15 +333,9 @@ static void stack_map_unlock_vma(struct stack_map_vma_lock *lock)
static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *id_offs,
u32 trace_nr)
{
- struct mm_struct *mm = current->mm;
- struct stack_map_vma_lock lock = { .mm = mm };
- struct {
- struct file *file;
- const unsigned char *build_id;
- unsigned long vm_start;
- unsigned long vm_end;
- unsigned long vm_pgoff;
- } cache = {};
+ struct stack_map_vma_lock lock = { .mm = current->mm };
+ struct stack_map_build_id_cache cache = {};
+ struct stack_map_cached_vma *res = &cache.resolved;
unsigned long vm_pgoff, vm_start, vm_end;
struct vm_area_struct *vma;
struct file *file;
@@ -262,44 +345,39 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
for (u32 i = 0; i < trace_nr; i++) {
ip = READ_ONCE(id_offs[i].ip);
- /*
- * Range cache fast path: if ip falls within the previously
- * resolved VMA range, reuse the cache build_id without
- * re-acquiring the VMA lock.
- */
- if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) {
- offset = stack_map_build_id_offset(cache.vm_pgoff, cache.vm_start, ip);
- stack_map_build_id_set_valid(&id_offs[i], offset, cache.build_id);
+ if (!stack_map_build_id_set_from_cache(&cache, &id_offs[i], ip))
continue;
- }
vma = stack_map_lock_vma(&lock, ip);
if (!vma) {
stack_map_build_id_set_ip(&id_offs[i]);
continue;
}
+
+ vm_pgoff = vma->vm_pgoff;
+ vm_start = vma->vm_start;
+ vm_end = vma->vm_end;
+
if (vma_is_anonymous(vma) || !vma->vm_file) {
- stack_map_build_id_set_ip(&id_offs[i]);
stack_map_unlock_vma(&lock);
+ stack_map_build_id_set_ip(&id_offs[i]);
+ stack_map_build_id_cache_set_unresolved(&cache, vm_start, vm_end);
continue;
}
file = vma->vm_file;
- vm_pgoff = vma->vm_pgoff;
- vm_start = vma->vm_start;
- vm_end = vma->vm_end;
offset = stack_map_build_id_offset(vm_pgoff, vm_start, ip);
/*
- * Same backing file as previous (e.g. different VMAs
- * of the same ELF binary). Reuse the cache build_id.
+ * Same backing file as the last resolved VMA (another mapping
+ * of the same ELF binary): reuse its build_id without re-parsing.
*/
- if (file == cache.file) {
+ if (file == res->file) {
stack_map_unlock_vma(&lock);
- stack_map_build_id_set_valid(&id_offs[i], offset, cache.build_id);
- cache.vm_start = vm_start;
- cache.vm_end = vm_end;
- cache.vm_pgoff = vm_pgoff;
+ stack_map_build_id_set_valid(&id_offs[i], offset, res->build_id);
+ res->vm_start = vm_start;
+ res->vm_end = vm_end;
+ res->vm_pgoff = vm_pgoff;
continue;
}
@@ -310,21 +388,17 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
if (build_id_parse_file(file, id_offs[i].build_id, NULL)) {
stack_map_build_id_set_ip(&id_offs[i]);
fput(file);
+ stack_map_build_id_cache_set_unresolved(&cache, vm_start, vm_end);
continue;
}
stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id);
- if (cache.file)
- fput(cache.file);
- cache.file = file;
- cache.build_id = id_offs[i].build_id;
- cache.vm_start = vm_start;
- cache.vm_end = vm_end;
- cache.vm_pgoff = vm_pgoff;
+ stack_map_build_id_cache_set_resolved(&cache, file, id_offs[i].build_id,
+ vm_start, vm_end, vm_pgoff);
}
- if (cache.file)
- fput(cache.file);
+ if (res->file)
+ fput(res->file);
}
/*
@@ -343,8 +417,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
struct mmap_unlock_irq_work *work = NULL;
bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
bool has_user_ctx = user && current && current->mm;
- struct vm_area_struct *vma, *prev_vma = NULL;
- const unsigned char *prev_build_id = NULL;
+ struct stack_map_build_id_cache cache = {};
+ struct vm_area_struct *vma;
int i;
if (may_fault && has_user_ctx) {
@@ -365,27 +439,30 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
for (i = 0; i < trace_nr; i++) {
u64 ip = READ_ONCE(id_offs[i].ip);
- u64 offset;
- if (prev_build_id && range_in_vma(prev_vma, ip, ip)) {
- vma = prev_vma;
- offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip);
- stack_map_build_id_set_valid(&id_offs[i], offset, prev_build_id);
+ if (!stack_map_build_id_set_from_cache(&cache, &id_offs[i], ip))
continue;
- }
+
vma = find_vma(current->mm, ip);
if (!vma || vma_is_anonymous(vma) ||
fetch_build_id(vma, id_offs[i].build_id, may_fault)) {
- /* per entry fall back to ips */
+ /* per entry fall back to ips; cache build-ID-less range */
stack_map_build_id_set_ip(&id_offs[i]);
- prev_vma = vma;
- prev_build_id = NULL;
+ if (vma)
+ stack_map_build_id_cache_set_unresolved(&cache,
+ vma->vm_start, vma->vm_end);
continue;
}
- offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip);
- stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id);
- prev_vma = vma;
- prev_build_id = id_offs[i].build_id;
+ /*
+ * mmap_lock is held for the whole loop, so the cached VMA
+ * fields stay valid; no file pinning is needed here.
+ */
+ stack_map_build_id_set_valid(&id_offs[i],
+ stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip),
+ id_offs[i].build_id);
+ stack_map_build_id_cache_set_resolved(&cache, NULL, id_offs[i].build_id,
+ vma->vm_start, vma->vm_end,
+ vma->vm_pgoff);
}
bpf_mmap_unlock_mm(work, current->mm);
}
diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c
index 32f346ce3ffc..ea2153cf28d0 100644
--- a/kernel/bpf/states.c
+++ b/kernel/bpf/states.c
@@ -436,12 +436,10 @@ static void __clean_func_state(struct bpf_verifier_env *env,
continue;
/*
- * Only destroy spilled_ptr when hi half is dead.
- * If hi half is still live with STACK_SPILL, the
- * spilled_ptr metadata is needed for correct state
- * comparison in stacksafe().
- * is_spilled_reg() is using slot_type[7], but
- * is_spilled_scalar_after() check either slot_type[0] or [4]
+ * Only scalar spills can be degraded to raw stack bytes
+ * when their high half is dead. Pointer spills need the
+ * saved spilled_ptr metadata so partial fills keep
+ * rejecting as non-scalar register fills.
*/
if (!hi_live) {
struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
@@ -449,6 +447,9 @@ static void __clean_func_state(struct bpf_verifier_env *env,
if (lo_live && stype == STACK_SPILL) {
u8 val = STACK_MISC;
+ if (spill->type != SCALAR_VALUE)
+ continue;
+
/*
* 8 byte spill of scalar 0 where half slot is dead
* should become STACK_ZERO in lo 4 bytes.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b44106c8ea75..6db306d23b47 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6308,7 +6308,7 @@ static int prog_stream_read(union bpf_attr *attr)
return ret;
}
-#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd
+#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.flags
static int prog_assoc_struct_ops(union bpf_attr *attr)
{
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2abc79dbf281..21a365d436a5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3479,7 +3479,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
bool sanitize = reg && is_spillable_regtype(reg->type);
for (i = 0; i < size; i++) {
- u8 type = state->stack[spi].slot_type[i];
+ u8 type = state->stack[spi].slot_type[(slot - i) %
+ BPF_REG_SIZE];
if (type != STACK_MISC && type != STACK_ZERO) {
sanitize = true;
@@ -5786,6 +5787,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EFAULT;
}
ret = env->ops->btf_struct_access(&env->log, reg, off, size);
+ if (ret < 0)
+ verbose(env,
+ "%s cannot write into ptr_%s at off=%d size=%d\n",
+ reg_arg_name(env, argno), tname, off, size);
} else {
/* Writes are permitted with default btf_struct_access for
* program allocated objects (which always have id > 0),
@@ -6196,6 +6201,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b
*/
if (info.reg_type == SCALAR_VALUE) {
if (info.is_retval && get_func_retval_range(env->prog, &range)) {
+ mark_reg_unknown(env, regs, value_regno);
err = __mark_reg_s32_range(env, regs, value_regno,
range.minval, range.maxval);
if (err)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 82f8feea6931..75495a5c3507 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2376,9 +2376,12 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32
int err = -ENOMEM;
unsigned int i;
+ if (!access_ok(usyms, cnt * sizeof(*usyms)))
+ return -EFAULT;
+
syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
if (!syms)
- goto error;
+ return -ENOMEM;
buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
if (!buf)
@@ -2403,10 +2406,8 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32
return 0;
error:
- if (err) {
- kvfree(syms);
- kvfree(buf);
- }
+ kvfree(syms);
+ kvfree(buf);
return err;
}