summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-06-12 18:21:01 -0700
committerAlexei Starovoitov <ast@kernel.org>2026-06-12 18:21:01 -0700
commit746145bd7aaa3db2d77ef26aa8f3ebe1ca83cef6 (patch)
tree57f2156c1ebdcd717beea7e86dded3aeef5e1b2d
parent30dee2c176e7954f63d1fa3e52d172f30beb9bfb (diff)
parentf0eff94d07cda9bd71754d95af4301cd437020b8 (diff)
Merge branch 'bpf-fix-generic-devmap-egress-skb-sharing'
Sun Jian says: ==================== bpf: Fix generic devmap egress skb sharing Generic XDP devmap multi redirect can leave cloned skbs sharing packet data. When a devmap egress program mutates packet data, another destination sharing the same data may observe that mutation. Fix this by making cloned skbs private before running the generic devmap egress program. The private copy is made in dev_map_generic_redirect() so dev_map_bpf_prog_run_skb() can keep returning the XDP action directly. Add selftest coverage for the last-destination case, where the final destination runs on the original skb while earlier destinations use cloned skbs. The test records the source MAC observed by an earlier destination and checks that it is neither the sentinel value left in the result map nor the MAC written by the final destination. --- v5: - Move the skb_copy() check back to dev_map_generic_redirect() to keep dev_map_bpf_prog_run_skb() returning only the XDP action. - Preserve mac_len after skb_copy(). - Use __be64 temporary values when updating mac_map from userspace. - Initialize rx_mac with a sentinel in the last-destination test instead of relying on -ENOENT for ARRAY map lookups. - Adjust the last-destination test topology so the checked earlier destination is not the ingress/source veth. - Split the last-destination check into two assertions: one for store_mac_1 updating rx_mac and one for detecting last-destination rewrite leakage. v4: https://lore.kernel.org/bpf/20260611080850.536996-1-sun.jian.kdev@gmail.com/T/#mf830f03d362f33e0941d1b0e425169698fce76e5 - Preserve mac_len after skb_copy(). - Separate errno return from XDP action output in dev_map_bpf_prog_run_skb(). - Zero-initialize net_config in the new selftest. v3: https://lore.kernel.org/bpf/20260611043317.512843-1-sun.jian.kdev@gmail.com/ - Split the kernel fix and selftest into separate patches. - Move the private-copy logic into dev_map_bpf_prog_run_skb(). - Use deterministic DEVMAP_HASH keys in the last-destination selftest. - Fix the Fixes tag. v2: https://lore.kernel.org/bpf/08c35c70-a59e-4e0e-91db-22b5ec30b611@linux.dev/ - Move the private-copy step into dev_map_generic_redirect() so the last-destination path is covered as well. - Use skb_copy() instead of skb_unshare() to keep caller ownership unchanged on allocation failure. - Add a generic XDP last-destination selftest case. v1: https://lore.kernel.org/bpf/CABFUUZFimdrZdq=NWi+N-0sJZWvMwY=f4iF6-3TVMS8=m07Zmw@mail.gmail.com/ ==================== Link: https://patch.msgid.link/20260612114032.244616-1-sun.jian.kdev@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--kernel/bpf/devmap.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c166
2 files changed, 175 insertions, 3 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 5b9eac5342a9..dc7b859e8bbf 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -710,6 +710,18 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
if (unlikely(err))
return err;
+ if (dst->xdp_prog && skb_cloned(skb)) {
+ struct sk_buff *nskb;
+
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ nskb->mac_len = skb->mac_len;
+ consume_skb(skb);
+ skb = nskb;
+ }
+
/* Redirect has already succeeded semantically at this point, so we just
* return 0 even if packet is dropped. Helper below takes care of
* freeing skb.
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 3e98a1665936..1675b32753a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -456,7 +456,11 @@ static void xdp_veth_egress(u32 flags)
.remote_flags = flags,
}
};
- const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+ const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = {
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 },
+ };
struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
struct xdp_redirect_map *xdp_redirect_map;
@@ -512,7 +516,13 @@ static void xdp_veth_egress(u32 flags)
&net_config, prog_cfg, i))
goto destroy_xdp_redirect_map;
- err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+ {
+ __be64 mac = 0;
+
+ memcpy(&mac, egress_macs[i], ETH_ALEN);
+ err = bpf_map_update_elem(mac_map, &ifindex, &mac, 0);
+ }
+
if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto destroy_xdp_redirect_map;
@@ -531,15 +541,162 @@ static void xdp_veth_egress(u32 flags)
for (i = 0; i < 2; i++) {
u32 key = i;
+ __be64 expected = 0;
u64 res;
err = bpf_map_lookup_elem(res_map, &key, &res);
if (!ASSERT_OK(err, "get MAC res"))
goto destroy_xdp_redirect_map;
- ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+ /* store_mac_1/2 run on the second/third remote veths. */
+ memcpy(&expected, egress_macs[i + 1], ETH_ALEN);
+ ASSERT_EQ(res, expected, "compare mac");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+static void xdp_veth_egress_last_dst(u32 flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_1",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ const unsigned char egress_macs[VETH_PAIRS_COUNT][ETH_ALEN] = {
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x01 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x02 },
+ { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0x03 },
+ };
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct net_configuration net_config = {};
+ int mac_map, egress_map, res_map;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ __be64 sentinel_mac = 0;
+ __be64 last_mac = 0;
+ __be64 res;
+ u32 key;
+ int err;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+ if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+ goto destroy_xdp_redirect_map;
+
+ egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+ if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_redirect_multi_kern->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ struct bpf_devmap_val devmap_val = {};
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+ u32 key = i;
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH,
+ net_config.veth_cfg[i].remote_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_EGRESS_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ {
+ __be64 mac = 0;
+
+ memcpy(&mac, egress_macs[i], ETH_ALEN);
+ err = bpf_map_update_elem(mac_map, &ifindex, &mac, 0);
+ }
+
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.ifindex = ifindex;
+ devmap_val.bpf_prog.fd = -1;
+
+ if (i == VETH_PAIRS_COUNT - 1)
+ devmap_val.bpf_prog.fd =
+ bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+ err = bpf_map_update_elem(egress_map, &key, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
}
+ res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+ if (!ASSERT_OK_FD(res_map, "open rx_map"))
+ goto destroy_xdp_redirect_map;
+
+ memcpy(&sentinel_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN);
+ memcpy(&last_mac, egress_macs[VETH_PAIRS_COUNT - 1], ETH_ALEN);
+
+ key = 0;
+ err = bpf_map_update_elem(res_map, &key, &sentinel_mac, 0);
+ if (!ASSERT_OK(err, "init rx mac"))
+ goto destroy_xdp_redirect_map;
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (!ASSERT_OK(err, "get MAC res"))
+ goto destroy_xdp_redirect_map;
+
+ if (!ASSERT_NEQ(res, sentinel_mac, "rx_mac overwritten by store_mac_1"))
+ goto destroy_xdp_redirect_map;
+
+ if (!ASSERT_NEQ(res, last_mac, "earlier dst not rewritten by last dst"))
+ goto destroy_xdp_redirect_map;
+
destroy_xdp_redirect_map:
close_netns(nstoken);
xdp_redirect_map__destroy(xdp_redirect_map);
@@ -596,4 +753,7 @@ void test_xdp_veth_egress(void)
if (test__start_subtest("SKB_MODE/egress"))
xdp_veth_egress(XDP_FLAGS_SKB_MODE);
+
+ if (test__start_subtest("SKB_MODE/egress_last_dst"))
+ xdp_veth_egress_last_dst(XDP_FLAGS_SKB_MODE);
}