diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2026-06-21 18:10:04 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2026-06-21 18:11:09 -0700 |
| commit | 3d8e6fef15db07c9247c1301ec6fa5532d15feb8 (patch) | |
| tree | 004e66c20636acb510ebd36d62477b89c1a0f2d8 | |
| parent | 8405c4626460503027461652f96d8bb10c2a9173 (diff) | |
| parent | a6ff26f360c87b7c9f76f493bcecb9223d87e9db (diff) | |
Merge branch 'fix-effective-prog-array-indexing-with-bpf_f_preorder'
Amery Hung says:
====================
Fix effective prog array indexing with BPF_F_PREORDER
This patchset fixes a cgroup effective-array indexing bug where
replace_effective_prog() and purge_effective_progs() used a linear hlist
position that doesn't match the array layout when BPF_F_PREORDER
programs are present, corrupting the array on link update and risking a
use-after-free in the detach fallback. It computes the slot via a shared
effective_prog_pos() helper and adds a cgroup_preorder selftest.
Changelog
v1 -> v2:
- Also fix purge_effective_progs(), in addition to
replace_effective_prog() (Sashiko).
- selftest: Set err on bpf_link_create() failure so the failure is
reported to the caller (Sashiko).
====================
Link: https://patch.msgid.link/20260619063520.2690547-1-ameryhung@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
| -rw-r--r-- | kernel/bpf/cgroup.c | 108 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c | 77 |
2 files changed, 139 insertions, 46 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 83ce66296ac1..4355ccb78a9c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -939,19 +939,65 @@ static int cgroup_bpf_attach(struct cgroup *cgrp, return ret; } +static int effective_prog_pos(struct cgroup *cgrp, + enum cgroup_bpf_attach_type atype, + struct bpf_prog_list *target_pl) +{ + int cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart, pos = -1; + struct bpf_prog_list *pl; + struct cgroup *p = cgrp; + + /* count effective programs to find where the preorder region ends */ + do { + if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt); + p = cgroup_parent(p); + } while (p); + + /* replay compute_effective_progs() placement and record target's slot */ + cnt = 0; + p = cgrp; + fstart = preorder_cnt; + bstart = preorder_cnt - 1; + do { + if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + continue; + + init_bstart = bstart; + hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { + if (!prog_list_prog(pl)) + continue; + + if (pl->flags & BPF_F_PREORDER) { + if (pl == target_pl) + pos = bstart; + bstart--; + } else { + if (pl == target_pl) + pos = fstart; + fstart++; + } + cnt++; + } + + /* reverse pre-ordering progs at this cgroup level */ + if (pos >= bstart + 1 && pos <= init_bstart) + pos = bstart + 1 + init_bstart - pos; + } while ((p = cgroup_parent(p))); + + return pos; +} + /* Swap updated BPF program for given link in effective program arrays across * all descendant cgroups. This function is guaranteed to succeed. */ static void replace_effective_prog(struct cgroup *cgrp, enum cgroup_bpf_attach_type atype, - struct bpf_cgroup_link *link) + struct bpf_prog_list *pl) { struct bpf_prog_array_item *item; struct cgroup_subsys_state *css; struct bpf_prog_array *progs; - struct bpf_prog_list *pl; - struct hlist_head *head; - struct cgroup *cg; int pos; css_for_each_descendant_pre(css, &cgrp->self) { @@ -960,27 +1006,15 @@ static void replace_effective_prog(struct cgroup *cgrp, if (percpu_ref_is_zero(&desc->bpf.refcnt)) continue; - /* find position of link in effective progs array */ - for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { - if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) - continue; + pos = effective_prog_pos(desc, atype, pl); + if (WARN_ON_ONCE(pos < 0)) + continue; - head = &cg->bpf.progs[atype]; - hlist_for_each_entry(pl, head, node) { - if (!prog_list_prog(pl)) - continue; - if (pl->link == link) - goto found; - pos++; - } - } -found: - BUG_ON(!cg); progs = rcu_dereference_protected( desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); item = &progs->items[pos]; - WRITE_ONCE(item->prog, link->link.prog); + WRITE_ONCE(item->prog, pl->link->link.prog); } } @@ -1024,7 +1058,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, cgrp->bpf.revisions[atype] += 1; old_prog = xchg(&link->link.prog, new_prog); - replace_effective_prog(cgrp, atype, link); + replace_effective_prog(cgrp, atype, pl); bpf_prog_put(old_prog); return 0; } @@ -1091,19 +1125,14 @@ static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs, * recomputing the array in place. * * @cgrp: The cgroup which descendants to travers - * @prog: A program to detach or NULL - * @link: A link to detach or NULL + * @pl: The prog_list entry being detached * @atype: Type of detach operation */ -static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_cgroup_link *link, +static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog_list *pl, enum cgroup_bpf_attach_type atype) { struct cgroup_subsys_state *css; struct bpf_prog_array *progs; - struct bpf_prog_list *pl; - struct hlist_head *head; - struct cgroup *cg; int pos; /* recompute effective prog array in place */ @@ -1113,24 +1142,11 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, if (percpu_ref_is_zero(&desc->bpf.refcnt)) continue; - /* find position of link or prog in effective progs array */ - for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { - if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) - continue; - - head = &cg->bpf.progs[atype]; - hlist_for_each_entry(pl, head, node) { - if (!prog_list_prog(pl)) - continue; - if (pl->prog == prog && pl->link == link) - goto found; - pos++; - } - } - + pos = effective_prog_pos(desc, atype, pl); /* no link or prog match, skip the cgroup of this layer */ - continue; -found: + if (pos < 0) + continue; + progs = rcu_dereference_protected( desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); @@ -1196,7 +1212,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, /* if update effective array failed replace the prog with a dummy prog*/ pl->prog = old_prog; pl->link = link; - purge_effective_progs(cgrp, old_prog, link, atype); + purge_effective_progs(cgrp, pl, atype); } /* now can actually delete it from this cgroup list */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c index d4d583872fa2..d2ccf409dfe3 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c @@ -102,6 +102,82 @@ close_skel: return err; } +/* + * Replacing a link's program (bpf_link_update) must target the correct slot in + * the effective array even when a BPF_F_PREORDER program is attached to the + * same cgroup. All programs here are attached to a single cgroup; "parent" is + * reused only as a third distinct program. + * + * Attach child(1) normally and child_2(2) with BPF_F_PREORDER, so the effective + * order is [2, 1]. Then replace child(1)'s program with parent(3): only the + * non-preorder slot changes, giving [2, 3]. + */ +static int run_link_replace_test(int cgroup_fd, int sock_fd) +{ + LIBBPF_OPTS(bpf_link_create_opts, create_opts); + int err = 0, normal_link = -1, preorder_link = -1; + struct cgroup_preorder *skel = NULL; + enum bpf_attach_type atype; + __u8 *result, buf = 0x00; + socklen_t optlen = 1; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return -1; + + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (!ASSERT_OK(err, "setsockopt")) + goto close_skel; + + atype = bpf_program__expected_attach_type(skel->progs.child); + + create_opts.flags = 0; + normal_link = bpf_link_create(bpf_program__fd(skel->progs.child), + cgroup_fd, atype, &create_opts); + if (!ASSERT_GE(normal_link, 0, "create_normal_link")) { + err = normal_link; + goto close_skel; + } + + create_opts.flags = BPF_F_PREORDER; + preorder_link = bpf_link_create(bpf_program__fd(skel->progs.child_2), + cgroup_fd, atype, &create_opts); + if (!ASSERT_GE(preorder_link, 0, "create_preorder_link")) { + err = preorder_link; + goto close_links; + } + + result = skel->bss->result; + skel->bss->idx = 0; + memset(result, 0, 4); + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt-before")) + goto close_links; + ASSERT_TRUE(result[0] == 2 && result[1] == 1, "order before update"); + + /* Replace the normal link's program child(1) -> parent(3). */ + err = bpf_link_update(normal_link, bpf_program__fd(skel->progs.parent), NULL); + if (!ASSERT_OK(err, "bpf_link_update")) + goto close_links; + + skel->bss->idx = 0; + memset(result, 0, 4); + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt-after")) + goto close_links; + ASSERT_TRUE(result[0] == 2 && result[1] == 3, "order after update"); + +close_links: + if (preorder_link >= 0) + close(preorder_link); + close(normal_link); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + void test_cgroup_preorder(void) { int cg_parent = -1, cg_child = -1, sock_fd = -1; @@ -120,6 +196,7 @@ void test_cgroup_preorder(void) ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1"); ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2"); + ASSERT_OK(run_link_replace_test(cg_child, sock_fd), "link_replace_test"); out: close(sock_fd); |
