Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Alexei Starovoitov: - Fix effective prog array index with BPF_F_PREORDER (Amery Hung) - Zero-initialize the fib lookup flow struct (Avinash Duduskar) - Disable xfrm_decode_session hook attachment (Bradley Morgan) - Allow type tag BTF records to succeed other modifier records (Emil Tsalapatis) - Fix build_id caching in stack_map_get_build_id_offset() (Ihor Solodrai) - Add missing access_ok call to copy_user_syms (Jiri Olsa) - Fix stack slot index in nospec checks (Nuoqi Gui) - Preserve pointer spill metadata during half-slot cleanup (Nuoqi Gui) - Fix partial copy of non-linear test_run output (Sun Jian) - Fix BPF_PROG_ASSOC_STRUCT_OPS last field check (Thiébaud Weksteen) - Reset register bounds before narrowing retval range (Tristan Madani) - Fix vmlinux BTF leak in bpftool cgroup commands (Yichong Chen) - Guard error writes in conntrack kfuncs (Yiyang Chen) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf: Disable xfrm_decode_session hook attachment selftests/bpf: Add test for stale bounds on LSM retval context load bpf: Reset register bounds before narrowing retval range in check_mem_access() selftests/bpf: Cover small conntrack opts error writes bpf: Guard conntrack opts error writes selftests/bpf: Cover half-slot cleanup of pointer spills bpf: Preserve pointer spill metadata during half-slot cleanup selftests/bpf: Test cgroup link replace with BPF_F_PREORDER bpf: Fix effective prog array index with BPF_F_PREORDER bpf: Fix BPF_PROG_ASSOC_STRUCT_OPS last field check bpf: zero-initialize the fib lookup flow struct bpftool: Fix vmlinux BTF leak in cgroup commands bpf: Add missing access_ok call to copy_user_syms bpf: Allow type tag BTF records to succeed other modifier records bpf: Emit verbose message when prog-specific btf_struct_access rejects a write bpf: Fix build_id caching in stack_map_get_build_id_offset() bpf: Fix partial copy of non-linear test_run output selftests/bpf: Cover stack nospec slot indexing bpf: Fix stack slot index in nospec checks
author: Linus Torvalds <torvalds@linux-foundation.org> 2026-06-25 14:09:26 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2026-06-25 14:09:26 -0700
commit: 4edcdefd4083ae04b1a5656f4be6cd83ae919ef4 (patch)
tree: 52f12981c8044a73f2b0963555bd1505c42217f1 /kernel
parent: 8c04c1292dca29a57ea82c6a44348be49749fc22 (diff)
parent: 12091470c6b4c1c14b2de12dcbae2ada6cb6d20b (diff)
8 files changed, 339 insertions, 198 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 564071a92d7d..1433809bb166 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -51,6 +51,9 @@ BTF_ID(func, bpf_lsm_key_getsecurity)
 #ifdef CONFIG_AUDIT
 BTF_ID(func, bpf_lsm_audit_rule_match)
 #endif
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+BTF_ID(func, bpf_lsm_xfrm_decode_session)
+#endif
 BTF_ID(func, bpf_lsm_ismaclabel)
 BTF_ID(func, bpf_lsm_file_alloc_security)
 BTF_SET_END(bpf_lsm_disabled_hooks)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 15ae7c43f594..64572f85edc8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -28,6 +28,7 @@
 #include <linux/string.h>
 #include <linux/sysfs.h>
 #include <linux/overflow.h>
+#include <linux/bitops.h>
 
 #include <net/netfilter/nf_bpf_link.h>
 
@@ -3472,12 +3473,69 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t,
 	return BTF_FIELD_FOUND;
 }
 
+struct btf_type_tag_match {
+	const char *name;
+	u32 flag;
+};
+
+struct btf_type_tag_walk_ctx {
+	const struct btf_type *t;	/* Input/Output */
+	u32 id;				/* Output */
+	u32 res;			/* Output */
+};
+
+static int btf_type_tag_walk(const struct btf *btf,
+			     struct btf_type_tag_walk_ctx *ctx,
+			     const struct btf_type_tag_match *matches,
+			     u32 match_cnt)
+{
+	const struct btf_type *t = ctx->t;
+	u32 res = 0;
+	const char *tag;
+	u32 id, i;
+
+	do {
+		id = t->type;
+		t = btf_type_by_id(btf, id);
+
+		if (!btf_type_is_modifier(t))
+			break;
+
+		if (!btf_type_is_type_tag(t) || btf_type_kflag(t))
+			continue;
+
+		tag = __btf_name_by_offset(btf, t->name_off);
+		for (i = 0; i < match_cnt; i++) {
+			if (strcmp(tag, matches[i].name))
+				continue;
+			res |= matches[i].flag;
+			break;
+		}
+	} while (true);
+
+	/* We only support a single tag. */
+	if (hweight32(res) > 1)
+		return -EINVAL;
+
+	ctx->t = t;
+	ctx->id = id;
+	ctx->res = res;
+
+	return 0;
+}
+
 static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 			 u32 off, int sz, struct btf_field_info *info, u32 field_mask)
 {
-	enum btf_field_type type;
-	const char *tag_value;
-	bool is_type_tag;
+	static const struct btf_type_tag_match kptr_type_tags[] = {
+		{ "kptr_untrusted", BPF_KPTR_UNREF },
+		{ "kptr", BPF_KPTR_REF },
+		{ "percpu_kptr", BPF_KPTR_PERCPU },
+		{ "uptr", BPF_UPTR },
+	};
+	struct btf_type_tag_walk_ctx ctx;
+	enum btf_field_type type = 0;
+	int err;
 	u32 res_id;
 
 	/* Permit modifiers on the pointer itself */
@@ -3486,30 +3544,20 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 	/* For PTR, sz is always == 8 */
 	if (!btf_type_is_ptr(t))
 		return BTF_FIELD_IGNORE;
-	t = btf_type_by_id(btf, t->type);
-	is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t);
-	if (!is_type_tag)
-		return BTF_FIELD_IGNORE;
-	/* Reject extra tags */
-	if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
-		return -EINVAL;
-	tag_value = __btf_name_by_offset(btf, t->name_off);
-	if (!strcmp("kptr_untrusted", tag_value))
-		type = BPF_KPTR_UNREF;
-	else if (!strcmp("kptr", tag_value))
-		type = BPF_KPTR_REF;
-	else if (!strcmp("percpu_kptr", tag_value))
-		type = BPF_KPTR_PERCPU;
-	else if (!strcmp("uptr", tag_value))
-		type = BPF_UPTR;
-	else
-		return -EINVAL;
+
+	ctx.t = t;
+	err = btf_type_tag_walk(btf, &ctx, kptr_type_tags,
+				ARRAY_SIZE(kptr_type_tags));
+	if (err)
+		return err;
+
+	t = ctx.t;
+	res_id = ctx.id;
+	type = ctx.res;
 
 	if (!(type & field_mask))
 		return BTF_FIELD_IGNORE;
 
-	/* Get the base type */
-	t = btf_type_skip_modifiers(btf, t->type, &res_id);
 	/* Only pointer to struct is allowed */
 	if (!__btf_type_is_struct(t))
 		return -EINVAL;
@@ -5859,11 +5907,10 @@ struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id)
 	return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func);
 }
 
-static int btf_check_type_tags(struct btf_verifier_env *env,
-			       struct btf *btf, int start_id)
+static int btf_check_modifier_chain_length(struct btf_verifier_env *env,
+					   struct btf *btf, int start_id)
 {
 	int i, n, good_id = start_id - 1;
-	bool in_tags;
 
 	n = btf_nr_types(btf);
 	for (i = start_id; i < n; i++) {
@@ -5879,20 +5926,12 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
 
 		cond_resched();
 
-		in_tags = btf_type_is_type_tag(t);
 		while (btf_type_is_modifier(t)) {
 			if (!chain_limit--) {
 				btf_verifier_log(env, "Max chain length or cycle detected");
 				return -ELOOP;
 			}
-			if (btf_type_is_type_tag(t)) {
-				if (!in_tags) {
-					btf_verifier_log(env, "Type tags don't precede modifiers");
-					return -EINVAL;
-				}
-			} else if (in_tags) {
-				in_tags = false;
-			}
+
 			if (cur_id <= good_id)
 				break;
 			/* Move to next type */
@@ -5970,7 +6009,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr,
 	if (err)
 		goto errout;
 
-	err = btf_check_type_tags(env, btf, 1);
+	err = btf_check_modifier_chain_length(env, btf, 1);
 	if (err)
 		goto errout;
 
@@ -6378,7 +6417,7 @@ static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name
 	if (err)
 		goto errout;
 
-	err = btf_check_type_tags(env, btf, 1);
+	err = btf_check_modifier_chain_length(env, btf, 1);
 	if (err)
 		goto errout;
 
@@ -6504,7 +6543,7 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
 	if (err)
 		goto errout;
 
-	err = btf_check_type_tags(env, btf, btf_nr_types(base_btf));
+	err = btf_check_modifier_chain_length(env, btf, btf_nr_types(base_btf));
 	if (err)
 		goto errout;
 
@@ -6810,14 +6849,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info)
 {
+	static const struct btf_type_tag_match ctx_type_tags[] = {
+		{ "user", MEM_USER },
+		{ "percpu", MEM_PERCPU },
+	};
 	const struct btf_type *t = prog->aux->attach_func_proto;
 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
 	struct btf *btf = bpf_prog_get_target_btf(prog);
 	const char *tname = prog->aux->attach_func_name;
 	struct bpf_verifier_log *log = info->log;
+	struct btf_type_tag_walk_ctx ctx;
 	const struct btf_param *args;
 	bool ptr_err_raw_tp = false;
-	const char *tag_value;
 	u32 nr_args, arg;
 	int i, ret;
 
@@ -7020,22 +7063,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	}
 
 	info->btf = btf;
-	info->btf_id = t->type;
-	t = btf_type_by_id(btf, t->type);
-
-	if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
-		tag_value = __btf_name_by_offset(btf, t->name_off);
-		if (strcmp(tag_value, "user") == 0)
-			info->reg_type |= MEM_USER;
-		if (strcmp(tag_value, "percpu") == 0)
-			info->reg_type |= MEM_PERCPU;
+	ctx.t = t;
+	ret = btf_type_tag_walk(btf, &ctx, ctx_type_tags,
+				ARRAY_SIZE(ctx_type_tags));
+	if (ret) {
+		bpf_log(log, "func '%s' arg%d type %s has multiple type tags\n",
+			tname, arg, btf_type_str(t));
+		return false;
 	}
+	info->reg_type |= ctx.res;
+	info->btf_id = ctx.id;
+	t = ctx.t;
 
-	/* skip modifiers */
-	while (btf_type_is_modifier(t)) {
-		info->btf_id = t->type;
-		t = btf_type_by_id(btf, t->type);
-	}
 	if (!btf_type_is_struct(t)) {
 		bpf_log(log,
 			"func '%s' arg%d type %s is not a struct\n",
@@ -7074,7 +7113,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
 	u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
 	const struct btf_type *mtype, *elem_type = NULL;
 	const struct btf_member *member;
-	const char *tname, *mname, *tag_value;
+	const char *tname, *mname;
 	u32 vlen, elem_id, mid;
 
 again:
@@ -7270,8 +7309,15 @@ error:
 		}
 
 		if (btf_type_is_ptr(mtype)) {
-			const struct btf_type *stype, *t;
+			static const struct btf_type_tag_match walk_type_tags[] = {
+				{ "user", MEM_USER },
+				{ "percpu", MEM_PERCPU },
+				{ "rcu", MEM_RCU },
+			};
 			enum bpf_type_flag tmp_flag = 0;
+			struct btf_type_tag_walk_ctx ctx = { .t = mtype };
+			const struct btf_type *stype;
+			int err;
 			u32 id;
 
 			if (msize != size || off != moff) {
@@ -7281,22 +7327,17 @@ error:
 				return -EACCES;
 			}
 
-			/* check type tag */
-			t = btf_type_by_id(btf, mtype->type);
-			if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
-				tag_value = __btf_name_by_offset(btf, t->name_off);
-				/* check __user tag */
-				if (strcmp(tag_value, "user") == 0)
-					tmp_flag = MEM_USER;
-				/* check __percpu tag */
-				if (strcmp(tag_value, "percpu") == 0)
-					tmp_flag = MEM_PERCPU;
-				/* check __rcu tag */
-				if (strcmp(tag_value, "rcu") == 0)
-					tmp_flag = MEM_RCU;
+			err = btf_type_tag_walk(btf, &ctx, walk_type_tags,
+						ARRAY_SIZE(walk_type_tags));
+			if (err) {
+				bpf_log(log, "type '%s' has multiple type tags\n",
+					btf_type_str(mtype));
+				return err;
 			}
+			tmp_flag = ctx.res;
+			id = ctx.id;
+			stype = ctx.t;
 
-			stype = btf_type_skip_modifiers(btf, mtype->type, &id);
 			if (btf_type_is_struct(stype)) {
 				*next_btf_id = id;
 				*flag |= tmp_flag;
@@ -7867,7 +7908,12 @@ static int btf_scan_type_tags(struct bpf_verifier_env *env,
 			      const struct btf *btf, u32 type_id,
 			      u32 *tags)
 {
+	static const struct btf_type_tag_match func_type_tags[] = {
+		{ "arena", ARG_TAG_ARENA },
+	};
+	struct btf_type_tag_walk_ctx ctx;
 	const struct btf_type *t;
+	int err;
 
 	/* Find the first pointer type in the chain. */
 	t = btf_type_skip_modifiers(btf, type_id, NULL);
@@ -7879,24 +7925,15 @@ static int btf_scan_type_tags(struct bpf_verifier_env *env,
 	if (!t || !btf_type_is_ptr(t))
 		return 0;
 
-	/* We got a pointer, get all associated type tags. */
-	for (t = btf_type_by_id(btf, t->type); t && btf_type_is_modifier(t);
-		t = btf_type_by_id(btf, t->type)) {
-
-		/* Skip non-type tag modifiers. */
-		if (!btf_type_is_type_tag(t))
-			continue;
-
-		const char *tag = __btf_name_by_offset(btf, t->name_off);
-
-		if (strcmp(tag, "arena") == 0) {
-			*tags |= ARG_TAG_ARENA;
-		} else {
-			bpf_log(&env->log, "function signature member has unsupported type tag '%s'\n",
-				tag);
-			return -EOPNOTSUPP;
-		}
+	ctx.t = t;
+	err = btf_type_tag_walk(btf, &ctx, func_type_tags,
+				ARRAY_SIZE(func_type_tags));
+	if (err) {
+		bpf_log(&env->log,
+			"function signature member has multiple type tags\n");
+		return err;
 	}
+	*tags |= ctx.res;
 
 	return 0;
 }
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 83ce66296ac1..4355ccb78a9c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -939,19 +939,65 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
 	return ret;
 }
 
+static int effective_prog_pos(struct cgroup *cgrp,
+			      enum cgroup_bpf_attach_type atype,
+			      struct bpf_prog_list *target_pl)
+{
+	int cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart, pos = -1;
+	struct bpf_prog_list *pl;
+	struct cgroup *p = cgrp;
+
+	/* count effective programs to find where the preorder region ends */
+	do {
+		if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+			cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt);
+		p = cgroup_parent(p);
+	} while (p);
+
+	/* replay compute_effective_progs() placement and record target's slot */
+	cnt = 0;
+	p = cgrp;
+	fstart = preorder_cnt;
+	bstart = preorder_cnt - 1;
+	do {
+		if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+			continue;
+
+		init_bstart = bstart;
+		hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
+			if (!prog_list_prog(pl))
+				continue;
+
+			if (pl->flags & BPF_F_PREORDER) {
+				if (pl == target_pl)
+					pos = bstart;
+				bstart--;
+			} else {
+				if (pl == target_pl)
+					pos = fstart;
+				fstart++;
+			}
+			cnt++;
+		}
+
+		/* reverse pre-ordering progs at this cgroup level */
+		if (pos >= bstart + 1 && pos <= init_bstart)
+			pos = bstart + 1 + init_bstart - pos;
+	} while ((p = cgroup_parent(p)));
+
+	return pos;
+}
+
 /* Swap updated BPF program for given link in effective program arrays across
  * all descendant cgroups. This function is guaranteed to succeed.
  */
 static void replace_effective_prog(struct cgroup *cgrp,
 				   enum cgroup_bpf_attach_type atype,
-				   struct bpf_cgroup_link *link)
+				   struct bpf_prog_list *pl)
 {
 	struct bpf_prog_array_item *item;
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_array *progs;
-	struct bpf_prog_list *pl;
-	struct hlist_head *head;
-	struct cgroup *cg;
 	int pos;
 
 	css_for_each_descendant_pre(css, &cgrp->self) {
@@ -960,27 +1006,15 @@ static void replace_effective_prog(struct cgroup *cgrp,
 		if (percpu_ref_is_zero(&desc->bpf.refcnt))
 			continue;
 
-		/* find position of link in effective progs array */
-		for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
-			if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
-				continue;
+		pos = effective_prog_pos(desc, atype, pl);
+		if (WARN_ON_ONCE(pos < 0))
+			continue;
 
-			head = &cg->bpf.progs[atype];
-			hlist_for_each_entry(pl, head, node) {
-				if (!prog_list_prog(pl))
-					continue;
-				if (pl->link == link)
-					goto found;
-				pos++;
-			}
-		}
-found:
-		BUG_ON(!cg);
 		progs = rcu_dereference_protected(
 				desc->bpf.effective[atype],
 				lockdep_is_held(&cgroup_mutex));
 		item = &progs->items[pos];
-		WRITE_ONCE(item->prog, link->link.prog);
+		WRITE_ONCE(item->prog, pl->link->link.prog);
 	}
 }
 
@@ -1024,7 +1058,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 
 	cgrp->bpf.revisions[atype] += 1;
 	old_prog = xchg(&link->link.prog, new_prog);
-	replace_effective_prog(cgrp, atype, link);
+	replace_effective_prog(cgrp, atype, pl);
 	bpf_prog_put(old_prog);
 	return 0;
 }
@@ -1091,19 +1125,14 @@ static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
  *                           recomputing the array in place.
  *
  * @cgrp: The cgroup which descendants to travers
- * @prog: A program to detach or NULL
- * @link: A link to detach or NULL
+ * @pl: The prog_list entry being detached
  * @atype: Type of detach operation
  */
-static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
-				  struct bpf_cgroup_link *link,
+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog_list *pl,
 				  enum cgroup_bpf_attach_type atype)
 {
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_array *progs;
-	struct bpf_prog_list *pl;
-	struct hlist_head *head;
-	struct cgroup *cg;
 	int pos;
 
 	/* recompute effective prog array in place */
@@ -1113,24 +1142,11 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
 		if (percpu_ref_is_zero(&desc->bpf.refcnt))
 			continue;
 
-		/* find position of link or prog in effective progs array */
-		for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
-			if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
-				continue;
-
-			head = &cg->bpf.progs[atype];
-			hlist_for_each_entry(pl, head, node) {
-				if (!prog_list_prog(pl))
-					continue;
-				if (pl->prog == prog && pl->link == link)
-					goto found;
-				pos++;
-			}
-		}
-
+		pos = effective_prog_pos(desc, atype, pl);
 		/* no link or prog match, skip the cgroup of this layer */
-		continue;
-found:
+		if (pos < 0)
+			continue;
+
 		progs = rcu_dereference_protected(
 				desc->bpf.effective[atype],
 				lockdep_is_held(&cgroup_mutex));
@@ -1196,7 +1212,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 		/* if update effective array failed replace the prog with a dummy prog*/
 		pl->prog = old_prog;
 		pl->link = link;
-		purge_effective_progs(cgrp, old_prog, link, atype);
+		purge_effective_progs(cgrp, pl, atype);
 	}
 
 	/* now can actually delete it from this cgroup list */
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 77ba03216c09..41fe87d7302f 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -175,6 +175,95 @@ static inline void stack_map_build_id_set_valid(struct bpf_stack_build_id *id,
 		memcpy(id->build_id, build_id, BUILD_ID_SIZE_MAX);
 }
 
+/*
+ * A cached VMA lookup result. The range [vm_start, vm_end) is always set.
+ * vm_pgoff, file, build_id are set only when the build ID was resolved.
+ * Zero vm_end marks the slot empty. build_id aliases the id_offs[] entry.
+ */
+struct stack_map_cached_vma {
+	unsigned long vm_start;
+	unsigned long vm_end;
+	unsigned long vm_pgoff;
+	struct file *file; /* pinned in the sleepable path; NULL otherwise */
+	const unsigned char *build_id;
+};
+
+/*
+ * Per stack_map_get_build_id_offset() call cache of the last VMA with a build ID
+ * resolved and the last VMA with no usable build ID. Adjacent stack frames tend
+ * to land in the same VMA or the same backing file, so caching the last result
+ * of each kind lets us skip unnecessary VMA lookups and build ID parse calls.
+ * Keeping the two slots independent means a build-ID-less VMA doesn't evict the
+ * last resolved build ID.
+ */
+struct stack_map_build_id_cache {
+	struct stack_map_cached_vma resolved;
+	struct stack_map_cached_vma unresolved;
+};
+
+/*
+ * Fill @id from a cached range covering @ip. On a hit this writes @id (resolved
+ * range -> build ID + offset, unresolved range -> raw ip) and returns 0; on a
+ * miss it leaves @id untouched and returns -ENOENT.
+ */
+static int stack_map_build_id_set_from_cache(struct stack_map_build_id_cache *cache,
+					     struct bpf_stack_build_id *id, u64 ip)
+{
+	unsigned long vm_start, vm_end, vm_pgoff;
+	u64 offset;
+
+	vm_start = cache->resolved.vm_start;
+	vm_end = cache->resolved.vm_end;
+	if (vm_end && ip >= vm_start && ip < vm_end) {
+		vm_pgoff = cache->resolved.vm_pgoff;
+		offset = stack_map_build_id_offset(vm_pgoff, vm_start, ip);
+		stack_map_build_id_set_valid(id, offset, cache->resolved.build_id);
+		return 0;
+	}
+
+	vm_start = cache->unresolved.vm_start;
+	vm_end = cache->unresolved.vm_end;
+	if (vm_end && ip >= vm_start && ip < vm_end) {
+		stack_map_build_id_set_ip(id);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+/*
+ * Record @vma's build ID as the last resolved one. @file is the pinned backing
+ * file in the sleepable path (released when evicted), or NULL otherwise.
+ */
+static void stack_map_build_id_cache_set_resolved(struct stack_map_build_id_cache *cache,
+						  struct file *file,
+						  const unsigned char *build_id,
+						  unsigned long vm_start,
+						  unsigned long vm_end,
+						  unsigned long vm_pgoff)
+{
+	if (cache->resolved.file)
+		fput(cache->resolved.file);
+	cache->resolved = (struct stack_map_cached_vma){
+		.vm_start = vm_start,
+		.vm_end = vm_end,
+		.vm_pgoff = vm_pgoff,
+		.file = file,
+		.build_id = build_id,
+	};
+}
+
+/* Record [vm_start, vm_end) as a range with no usable build ID. */
+static void stack_map_build_id_cache_set_unresolved(struct stack_map_build_id_cache *cache,
+						    unsigned long vm_start,
+						    unsigned long vm_end)
+{
+	cache->unresolved = (struct stack_map_cached_vma){
+		.vm_start = vm_start,
+		.vm_end = vm_end,
+	};
+}
+
 struct stack_map_vma_lock {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm;
@@ -244,15 +333,9 @@ static void stack_map_unlock_vma(struct stack_map_vma_lock *lock)
 static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *id_offs,
 						    u32 trace_nr)
 {
-	struct mm_struct *mm = current->mm;
-	struct stack_map_vma_lock lock = { .mm = mm };
-	struct {
-		struct file *file;
-		const unsigned char *build_id;
-		unsigned long vm_start;
-		unsigned long vm_end;
-		unsigned long vm_pgoff;
-	} cache = {};
+	struct stack_map_vma_lock lock = { .mm = current->mm };
+	struct stack_map_build_id_cache cache = {};
+	struct stack_map_cached_vma *res = &cache.resolved;
 	unsigned long vm_pgoff, vm_start, vm_end;
 	struct vm_area_struct *vma;
 	struct file *file;
@@ -262,44 +345,39 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 	for (u32 i = 0; i < trace_nr; i++) {
 		ip = READ_ONCE(id_offs[i].ip);
 
-		/*
-		 * Range cache fast path: if ip falls within the previously
-		 * resolved VMA range, reuse the cache build_id without
-		 * re-acquiring the VMA lock.
-		 */
-		if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) {
-			offset = stack_map_build_id_offset(cache.vm_pgoff, cache.vm_start, ip);
-			stack_map_build_id_set_valid(&id_offs[i], offset, cache.build_id);
+		if (!stack_map_build_id_set_from_cache(&cache, &id_offs[i], ip))
 			continue;
-		}
 
 		vma = stack_map_lock_vma(&lock, ip);
 		if (!vma) {
 			stack_map_build_id_set_ip(&id_offs[i]);
 			continue;
 		}
+
+		vm_pgoff = vma->vm_pgoff;
+		vm_start = vma->vm_start;
+		vm_end = vma->vm_end;
+
 		if (vma_is_anonymous(vma) || !vma->vm_file) {
-			stack_map_build_id_set_ip(&id_offs[i]);
 			stack_map_unlock_vma(&lock);
+			stack_map_build_id_set_ip(&id_offs[i]);
+			stack_map_build_id_cache_set_unresolved(&cache, vm_start, vm_end);
 			continue;
 		}
 
 		file = vma->vm_file;
-		vm_pgoff = vma->vm_pgoff;
-		vm_start = vma->vm_start;
-		vm_end = vma->vm_end;
 		offset = stack_map_build_id_offset(vm_pgoff, vm_start, ip);
 
 		/*
-		 * Same backing file as previous (e.g. different VMAs
-		 * of the same ELF binary). Reuse the cache build_id.
+		 * Same backing file as the last resolved VMA (another mapping
+		 * of the same ELF binary): reuse its build_id without re-parsing.
 		 */
-		if (file == cache.file) {
+		if (file == res->file) {
 			stack_map_unlock_vma(&lock);
-			stack_map_build_id_set_valid(&id_offs[i], offset, cache.build_id);
-			cache.vm_start = vm_start;
-			cache.vm_end = vm_end;
-			cache.vm_pgoff = vm_pgoff;
+			stack_map_build_id_set_valid(&id_offs[i], offset, res->build_id);
+			res->vm_start = vm_start;
+			res->vm_end = vm_end;
+			res->vm_pgoff = vm_pgoff;
 			continue;
 		}
 
@@ -310,21 +388,17 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 		if (build_id_parse_file(file, id_offs[i].build_id, NULL)) {
 			stack_map_build_id_set_ip(&id_offs[i]);
 			fput(file);
+			stack_map_build_id_cache_set_unresolved(&cache, vm_start, vm_end);
 			continue;
 		}
 
 		stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id);
-		if (cache.file)
-			fput(cache.file);
-		cache.file = file;
-		cache.build_id = id_offs[i].build_id;
-		cache.vm_start = vm_start;
-		cache.vm_end = vm_end;
-		cache.vm_pgoff = vm_pgoff;
+		stack_map_build_id_cache_set_resolved(&cache, file, id_offs[i].build_id,
+						      vm_start, vm_end, vm_pgoff);
 	}
 
-	if (cache.file)
-		fput(cache.file);
+	if (res->file)
+		fput(res->file);
 }
 
 /*
@@ -343,8 +417,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	struct mmap_unlock_irq_work *work = NULL;
 	bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
 	bool has_user_ctx = user && current && current->mm;
-	struct vm_area_struct *vma, *prev_vma = NULL;
-	const unsigned char *prev_build_id = NULL;
+	struct stack_map_build_id_cache cache = {};
+	struct vm_area_struct *vma;
 	int i;
 
 	if (may_fault && has_user_ctx) {
@@ -365,27 +439,30 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 
 	for (i = 0; i < trace_nr; i++) {
 		u64 ip = READ_ONCE(id_offs[i].ip);
-		u64 offset;
 
-		if (prev_build_id && range_in_vma(prev_vma, ip, ip)) {
-			vma = prev_vma;
-			offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip);
-			stack_map_build_id_set_valid(&id_offs[i], offset, prev_build_id);
+		if (!stack_map_build_id_set_from_cache(&cache, &id_offs[i], ip))
 			continue;
-		}
+
 		vma = find_vma(current->mm, ip);
 		if (!vma || vma_is_anonymous(vma) ||
 		    fetch_build_id(vma, id_offs[i].build_id, may_fault)) {
-			/* per entry fall back to ips */
+			/* per entry fall back to ips; cache build-ID-less range */
 			stack_map_build_id_set_ip(&id_offs[i]);
-			prev_vma = vma;
-			prev_build_id = NULL;
+			if (vma)
+				stack_map_build_id_cache_set_unresolved(&cache,
+						vma->vm_start, vma->vm_end);
 			continue;
 		}
-		offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip);
-		stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id);
-		prev_vma = vma;
-		prev_build_id = id_offs[i].build_id;
+		/*
+		 * mmap_lock is held for the whole loop, so the cached VMA
+		 * fields stay valid; no file pinning is needed here.
+		 */
+		stack_map_build_id_set_valid(&id_offs[i],
+			stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip),
+			id_offs[i].build_id);
+		stack_map_build_id_cache_set_resolved(&cache, NULL, id_offs[i].build_id,
+						      vma->vm_start, vma->vm_end,
+						      vma->vm_pgoff);
 	}
 	bpf_mmap_unlock_mm(work, current->mm);
 }
diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c
index 32f346ce3ffc..ea2153cf28d0 100644
--- a/kernel/bpf/states.c
+++ b/kernel/bpf/states.c
@@ -436,12 +436,10 @@ static void __clean_func_state(struct bpf_verifier_env *env,
 				continue;
 
 			/*
-			 * Only destroy spilled_ptr when hi half is dead.
-			 * If hi half is still live with STACK_SPILL, the
-			 * spilled_ptr metadata is needed for correct state
-			 * comparison in stacksafe().
-			 * is_spilled_reg() is using slot_type[7], but
-			 * is_spilled_scalar_after() check either slot_type[0] or [4]
+			 * Only scalar spills can be degraded to raw stack bytes
+			 * when their high half is dead. Pointer spills need the
+			 * saved spilled_ptr metadata so partial fills keep
+			 * rejecting as non-scalar register fills.
 			 */
 			if (!hi_live) {
 				struct bpf_reg_state *spill = &st->stack[i].spilled_ptr;
@@ -449,6 +447,9 @@ static void __clean_func_state(struct bpf_verifier_env *env,
 				if (lo_live && stype == STACK_SPILL) {
 					u8 val = STACK_MISC;
 
+					if (spill->type != SCALAR_VALUE)
+						continue;
+
 					/*
 					 * 8 byte spill of scalar 0 where half slot is dead
 					 * should become STACK_ZERO in lo 4 bytes.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b44106c8ea75..6db306d23b47 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6308,7 +6308,7 @@ static int prog_stream_read(union bpf_attr *attr)
 	return ret;
 }
 
-#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.prog_fd
+#define BPF_PROG_ASSOC_STRUCT_OPS_LAST_FIELD prog_assoc_struct_ops.flags
 
 static int prog_assoc_struct_ops(union bpf_attr *attr)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2abc79dbf281..21a365d436a5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3479,7 +3479,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 		bool sanitize = reg && is_spillable_regtype(reg->type);
 
 		for (i = 0; i < size; i++) {
-			u8 type = state->stack[spi].slot_type[i];
+			u8 type = state->stack[spi].slot_type[(slot - i) %
+							      BPF_REG_SIZE];
 
 			if (type != STACK_MISC && type != STACK_ZERO) {
 				sanitize = true;
@@ -5786,6 +5787,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 			return -EFAULT;
 		}
 		ret = env->ops->btf_struct_access(&env->log, reg, off, size);
+		if (ret < 0)
+			verbose(env,
+				"%s cannot write into ptr_%s at off=%d size=%d\n",
+				reg_arg_name(env, argno), tname, off, size);
 	} else {
 		/* Writes are permitted with default btf_struct_access for
 		 * program allocated objects (which always have id > 0),
@@ -6196,6 +6201,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct b
 			 */
 			if (info.reg_type == SCALAR_VALUE) {
 				if (info.is_retval && get_func_retval_range(env->prog, &range)) {
+					mark_reg_unknown(env, regs, value_regno);
 					err = __mark_reg_s32_range(env, regs, value_regno,
 								   range.minval, range.maxval);
 					if (err)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 82f8feea6931..75495a5c3507 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2376,9 +2376,12 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32
 	int err = -ENOMEM;
 	unsigned int i;
 
+	if (!access_ok(usyms, cnt * sizeof(*usyms)))
+		return -EFAULT;
+
 	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
 	if (!syms)
-		goto error;
+		return -ENOMEM;
 
 	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
 	if (!buf)
@@ -2403,10 +2406,8 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32
 	return 0;
 
 error:
-	if (err) {
-		kvfree(syms);
-		kvfree(buf);
-	}
+	kvfree(syms);
+	kvfree(buf);
 	return err;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2026-06-25 14:09:26 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2026-06-25 14:09:26 -0700
commit	4edcdefd4083ae04b1a5656f4be6cd83ae919ef4 (patch)
tree	52f12981c8044a73f2b0963555bd1505c42217f1 /kernel
parent	8c04c1292dca29a57ea82c6a44348be49749fc22 (diff)
parent	12091470c6b4c1c14b2de12dcbae2ada6cb6d20b (diff)