summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-06-21 18:10:04 -0700
committerAlexei Starovoitov <ast@kernel.org>2026-06-21 18:11:09 -0700
commit3d8e6fef15db07c9247c1301ec6fa5532d15feb8 (patch)
tree004e66c20636acb510ebd36d62477b89c1a0f2d8
parent8405c4626460503027461652f96d8bb10c2a9173 (diff)
parenta6ff26f360c87b7c9f76f493bcecb9223d87e9db (diff)
Merge branch 'fix-effective-prog-array-indexing-with-bpf_f_preorder'
Amery Hung says: ==================== Fix effective prog array indexing with BPF_F_PREORDER This patchset fixes a cgroup effective-array indexing bug where replace_effective_prog() and purge_effective_progs() used a linear hlist position that doesn't match the array layout when BPF_F_PREORDER programs are present, corrupting the array on link update and risking a use-after-free in the detach fallback. It computes the slot via a shared effective_prog_pos() helper and adds a cgroup_preorder selftest. Changelog v1 -> v2: - Also fix purge_effective_progs(), in addition to replace_effective_prog() (Sashiko). - selftest: Set err on bpf_link_create() failure so the failure is reported to the caller (Sashiko). ==================== Link: https://patch.msgid.link/20260619063520.2690547-1-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--kernel/bpf/cgroup.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c77
2 files changed, 139 insertions, 46 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 83ce66296ac1..4355ccb78a9c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -939,19 +939,65 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
return ret;
}
+static int effective_prog_pos(struct cgroup *cgrp,
+ enum cgroup_bpf_attach_type atype,
+ struct bpf_prog_list *target_pl)
+{
+ int cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart, pos = -1;
+ struct bpf_prog_list *pl;
+ struct cgroup *p = cgrp;
+
+ /* count effective programs to find where the preorder region ends */
+ do {
+ if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt);
+ p = cgroup_parent(p);
+ } while (p);
+
+ /* replay compute_effective_progs() placement and record target's slot */
+ cnt = 0;
+ p = cgrp;
+ fstart = preorder_cnt;
+ bstart = preorder_cnt - 1;
+ do {
+ if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ init_bstart = bstart;
+ hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
+ if (!prog_list_prog(pl))
+ continue;
+
+ if (pl->flags & BPF_F_PREORDER) {
+ if (pl == target_pl)
+ pos = bstart;
+ bstart--;
+ } else {
+ if (pl == target_pl)
+ pos = fstart;
+ fstart++;
+ }
+ cnt++;
+ }
+
+ /* reverse pre-ordering progs at this cgroup level */
+ if (pos >= bstart + 1 && pos <= init_bstart)
+ pos = bstart + 1 + init_bstart - pos;
+ } while ((p = cgroup_parent(p)));
+
+ return pos;
+}
+
/* Swap updated BPF program for given link in effective program arrays across
* all descendant cgroups. This function is guaranteed to succeed.
*/
static void replace_effective_prog(struct cgroup *cgrp,
enum cgroup_bpf_attach_type atype,
- struct bpf_cgroup_link *link)
+ struct bpf_prog_list *pl)
{
struct bpf_prog_array_item *item;
struct cgroup_subsys_state *css;
struct bpf_prog_array *progs;
- struct bpf_prog_list *pl;
- struct hlist_head *head;
- struct cgroup *cg;
int pos;
css_for_each_descendant_pre(css, &cgrp->self) {
@@ -960,27 +1006,15 @@ static void replace_effective_prog(struct cgroup *cgrp,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- /* find position of link in effective progs array */
- for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
- if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
- continue;
+ pos = effective_prog_pos(desc, atype, pl);
+ if (WARN_ON_ONCE(pos < 0))
+ continue;
- head = &cg->bpf.progs[atype];
- hlist_for_each_entry(pl, head, node) {
- if (!prog_list_prog(pl))
- continue;
- if (pl->link == link)
- goto found;
- pos++;
- }
- }
-found:
- BUG_ON(!cg);
progs = rcu_dereference_protected(
desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
item = &progs->items[pos];
- WRITE_ONCE(item->prog, link->link.prog);
+ WRITE_ONCE(item->prog, pl->link->link.prog);
}
}
@@ -1024,7 +1058,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
cgrp->bpf.revisions[atype] += 1;
old_prog = xchg(&link->link.prog, new_prog);
- replace_effective_prog(cgrp, atype, link);
+ replace_effective_prog(cgrp, atype, pl);
bpf_prog_put(old_prog);
return 0;
}
@@ -1091,19 +1125,14 @@ static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
* recomputing the array in place.
*
* @cgrp: The cgroup which descendants to travers
- * @prog: A program to detach or NULL
- * @link: A link to detach or NULL
+ * @pl: The prog_list entry being detached
* @atype: Type of detach operation
*/
-static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_cgroup_link *link,
+static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog_list *pl,
enum cgroup_bpf_attach_type atype)
{
struct cgroup_subsys_state *css;
struct bpf_prog_array *progs;
- struct bpf_prog_list *pl;
- struct hlist_head *head;
- struct cgroup *cg;
int pos;
/* recompute effective prog array in place */
@@ -1113,24 +1142,11 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- /* find position of link or prog in effective progs array */
- for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
- if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
- continue;
-
- head = &cg->bpf.progs[atype];
- hlist_for_each_entry(pl, head, node) {
- if (!prog_list_prog(pl))
- continue;
- if (pl->prog == prog && pl->link == link)
- goto found;
- pos++;
- }
- }
-
+ pos = effective_prog_pos(desc, atype, pl);
/* no link or prog match, skip the cgroup of this layer */
- continue;
-found:
+ if (pos < 0)
+ continue;
+
progs = rcu_dereference_protected(
desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
@@ -1196,7 +1212,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
/* if update effective array failed replace the prog with a dummy prog*/
pl->prog = old_prog;
pl->link = link;
- purge_effective_progs(cgrp, old_prog, link, atype);
+ purge_effective_progs(cgrp, pl, atype);
}
/* now can actually delete it from this cgroup list */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
index d4d583872fa2..d2ccf409dfe3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -102,6 +102,82 @@ close_skel:
return err;
}
+/*
+ * Replacing a link's program (bpf_link_update) must target the correct slot in
+ * the effective array even when a BPF_F_PREORDER program is attached to the
+ * same cgroup. All programs here are attached to a single cgroup; "parent" is
+ * reused only as a third distinct program.
+ *
+ * Attach child(1) normally and child_2(2) with BPF_F_PREORDER, so the effective
+ * order is [2, 1]. Then replace child(1)'s program with parent(3): only the
+ * non-preorder slot changes, giving [2, 3].
+ */
+static int run_link_replace_test(int cgroup_fd, int sock_fd)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ int err = 0, normal_link = -1, preorder_link = -1;
+ struct cgroup_preorder *skel = NULL;
+ enum bpf_attach_type atype;
+ __u8 *result, buf = 0x00;
+ socklen_t optlen = 1;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return -1;
+
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto close_skel;
+
+ atype = bpf_program__expected_attach_type(skel->progs.child);
+
+ create_opts.flags = 0;
+ normal_link = bpf_link_create(bpf_program__fd(skel->progs.child),
+ cgroup_fd, atype, &create_opts);
+ if (!ASSERT_GE(normal_link, 0, "create_normal_link")) {
+ err = normal_link;
+ goto close_skel;
+ }
+
+ create_opts.flags = BPF_F_PREORDER;
+ preorder_link = bpf_link_create(bpf_program__fd(skel->progs.child_2),
+ cgroup_fd, atype, &create_opts);
+ if (!ASSERT_GE(preorder_link, 0, "create_preorder_link")) {
+ err = preorder_link;
+ goto close_links;
+ }
+
+ result = skel->bss->result;
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt-before"))
+ goto close_links;
+ ASSERT_TRUE(result[0] == 2 && result[1] == 1, "order before update");
+
+ /* Replace the normal link's program child(1) -> parent(3). */
+ err = bpf_link_update(normal_link, bpf_program__fd(skel->progs.parent), NULL);
+ if (!ASSERT_OK(err, "bpf_link_update"))
+ goto close_links;
+
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt-after"))
+ goto close_links;
+ ASSERT_TRUE(result[0] == 2 && result[1] == 3, "order after update");
+
+close_links:
+ if (preorder_link >= 0)
+ close(preorder_link);
+ close(normal_link);
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
void test_cgroup_preorder(void)
{
int cg_parent = -1, cg_child = -1, sock_fd = -1;
@@ -120,6 +196,7 @@ void test_cgroup_preorder(void)
ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+ ASSERT_OK(run_link_replace_test(cg_child, sock_fd), "link_replace_test");
out:
close(sock_fd);