summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-06-24 19:56:58 -0700
committerJakub Kicinski <kuba@kernel.org>2026-06-24 19:56:59 -0700
commit02f144fbb4c86c360495d33debe307cb46a57f95 (patch)
tree3f0c348379dbbf6d0247906e0e23a56dab07ee16
parent620839b699aa7b1aaba925547eec6d2b976aa763 (diff)
parent397c8300972f6e1486fd1afd99a044648a401cd5 (diff)
Merge tag 'nf-26-06-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter fixes for net: 1) Add a workaround to avoid a possible crash if nf_nat and nft_chain_nat are compiled built-in and nf_nat fails to register, allowing nft_chain_nat to access the incorrect pernetns area. This is crash specific of all built-in compilation. From Matias Krause. 2) Revisit conncount GC optimization for confirmed conntracks, skip GC round if IPS_ASSURED is set on. This is addressing an issue for corner case use case scenario involving locally generated traffic. No crash, just a functionality fix. From Fernando F. Mancera. 3) Validate iph->ihl in flowtable IPIP tunnel support, from Lorenzo Bianconi. This a sanity check to bounces back malformed IPIP packets to classic forwarding path. 4) Kdoc fixes for x_tables.h, from Randy Dunlap. 5) Use info->options so nft_synproxy_tcp_options() stays on the same local snapshot, otherwise eval path can observe inconsistent mix of mss and timestamps. From Runyu Xiao. 6) Add conntrack_sctp_collision.sh to cover for SCTP INIT collisions. From Yi Chen. 7) Do not allow NFPROTO_UNSPEC targets if family is NFPROTO_BRIDGE in nft_compat. This allows to use non-sense targets such as xt_nat leading to crash. From Florian Westphal. 8) Add a selftest queueing from bridge family. From Florian Westphal. 9) Do not allow to reset a conntrack helper via ctnetlink. This feature antedates the creation of the conntrack-tools, and it is not used I don't have a usecase for it, I prefer to remove than fixing it. 10) Add deprecation warning for IPv4 only conntrack helpers for PPTP and IRC. From Florian Westphal. 11) Store the master tuple in the expectation object and use it, otherwise SLAB_TYPESAFE_RCU rules allow to display incorrect master tuple information through ctnetlink. 12) Run expectation eviction when inserting an expectation with no helper, this is a fix for the nft_ct custom expectation support. 13) Fix nft_ct custom expectation timeouts, userspace provides a timeout in milliseconds but kernel assumes this comes in seconds. From Florian Westphal. 14) Cap maximum number of expectations per class to 255 expectations per master conntrack at helper registration. This is a fix to restrict the maximum number of expectations per master conntrack which can be a issue for the new lazy GC expectation approach. * tag 'nf-26-06-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: netfilter: nf_conntrack_helper: cap maximum number of expectation at helper registration netfilter: nft_ct: expectation timeouts are passed in milliseconds netfilter: nf_conntrack_expect: run expectation eviction with no helper netfilter: nf_conntrack_expect: store master_tuple in expectation netfilter: conntrack: add deprecation warnings for irc and pptp trackers netfilter: ctnetlink: do not allow to reset helper on existing conntrack selftests: nft_queue.sh: add a bridge queue test netfilter: nft_compat: ebtables emulation must reject non-bridge targets selftests: netfilter: conntrack_sctp_collision.sh: Introduce SCTP INIT collision test netfilter: nft_synproxy: stop bypassing the priv->info snapshot netfilter: x_tables.h: fix all kernel-doc warnings netfilter: flowtable: Validate iph->ihl in nf_flow_ip4_tunnel_proto() netfilter: nf_conncount: prevent connlimit drops for early confirmed ct netfilter: nf_nat: avoid invalid nat_net pointer use on failed nf_nat_init() ==================== Link: https://patch.msgid.link/20260623221548.701545-1-pablo@netfilter.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/netfilter/x_tables.h29
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h1
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h4
-rw-r--r--net/netfilter/Kconfig11
-rw-r--r--net/netfilter/nf_conncount.c11
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_expect.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c9
-rw-r--r--net/netfilter/nf_conntrack_irc.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c23
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_flow_table_ip.c8
-rw-r--r--net/netfilter/nf_nat_core.c10
-rw-r--r--net/netfilter/nft_compat.c24
-rw-r--r--net/netfilter/nft_ct.c21
-rw-r--r--net/netfilter/nft_synproxy.c9
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh89
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh66
18 files changed, 246 insertions, 86 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 20d70dddbe50..25062f4a0dd5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -18,7 +18,7 @@
* @match: the match extension
* @target: the target extension
* @matchinfo: per-match data
- * @targetinfo: per-target data
+ * @targinfo: per-target data
* @state: pointer to hook state this packet came from
* @fragoff: packet is a fragment, this is the data offset
* @thoff: position of transport header relative to skb->data
@@ -77,7 +77,9 @@ static inline u_int8_t xt_family(const struct xt_action_param *par)
* @match: struct xt_match through which this function was invoked
* @matchinfo: per-match data
* @hook_mask: via which hooks the new rule is reachable
- * Other fields as above.
+ * @family: actual NFPROTO_* through which the function is invoked
+ * (helpful when match->family == NFPROTO_UNSPEC)
+ * @nft_compat: running from the nft compat layer if true
*/
struct xt_mtchk_param {
struct net *net;
@@ -91,8 +93,13 @@ struct xt_mtchk_param {
};
/**
- * struct xt_mdtor_param - match destructor parameters
- * Fields as above.
+ * struct xt_mtdtor_param - match destructor parameters
+ *
+ * @net: network namespace through which the check was invoked
+ * @match: struct xt_match through which this function was invoked
+ * @matchinfo: per-match data
+ * @family: actual NFPROTO_* through which the function is invoked
+ * (helpful when match->family == NFPROTO_UNSPEC)
*/
struct xt_mtdtor_param {
struct net *net;
@@ -105,10 +112,16 @@ struct xt_mtdtor_param {
* struct xt_tgchk_param - parameters for target extensions'
* checkentry functions
*
+ * @net: network namespace through which the check was invoked
+ * @table: table the rule is tried to be inserted into
* @entryinfo: the family-specific rule data
* (struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry)
- *
- * Other fields see above.
+ * @target: the target extension
+ * @targinfo: per-target data
+ * @hook_mask: via which hooks the new rule is reachable
+ * @family: actual NFPROTO_* through which the function is invoked
+ * (helpful when match->family == NFPROTO_UNSPEC)
+ * @nft_compat: running from the nft compat layer if true
*/
struct xt_tgchk_param {
struct net *net;
@@ -336,9 +349,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size);
void xt_free_table_info(struct xt_table_info *info);
/**
- * xt_recseq - recursive seqcount for netfilter use
+ * var xt_recseq - recursive seqcount for netfilter use
*
- * Packet processing changes the seqcount only if no recursion happened
+ * Packet processing changes the seqcount only if no recursion happened.
* get_counters() can use read_seqcount_begin()/read_seqcount_retry(),
* because we use the normal seqcount convention :
* Low order bit set to 1 if a writer is active.
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index be4a120d549e..c024345c9bd8 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -26,6 +26,7 @@ struct nf_conntrack_expect {
possible_net_t net;
/* We expect this tuple, with the following mask */
+ struct nf_conntrack_tuple master_tuple;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_mask mask;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 81025101f86d..c761cd8158b2 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -114,6 +114,10 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int,
void nf_conntrack_helpers_unregister(struct nf_conntrack_helper **,
unsigned int);
+#define nf_conntrack_helper_deprecated(name) \
+ pr_warn("The %s conntrack helper is scheduled for removal.\n" \
+ "Please contact the netfilter-devel mailing list if you still need this.\n", name)
+
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 665f8008cc4b..4c04cd8d40a2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -256,8 +256,7 @@ config NF_CONNTRACK_H323
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_IRC
- tristate "IRC protocol support"
- default m if NETFILTER_ADVANCED=n
+ tristate "IRC DCC protocol support (obsolete)"
help
There is a commonly-used extension to IRC called
Direct Client-to-Client Protocol (DCC). This enables users to send
@@ -267,6 +266,8 @@ config NF_CONNTRACK_IRC
using NAT, this extension will enable you to send files and initiate
chats. Note that you do NOT need this extension to get files or
have others initiate chats, or everything else in IRC.
+ DCC tracking behind NAT requires plaintext (unencrypted) IRC, so
+ this helper is of limited use these days.
To compile it as a module, choose M here. If unsure, say N.
@@ -308,17 +309,17 @@ config NF_CONNTRACK_SNMP
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_PPTP
- tristate "PPtP protocol support"
+ tristate "PPtP protocol support (deprecated)"
depends on NETFILTER_ADVANCED
select NF_CT_PROTO_GRE
help
This module adds support for PPTP (Point to Point Tunnelling
Protocol, RFC2637) connection tracking and NAT.
- If you are running PPTP sessions over a stateful firewall or NAT
+ If you are still running PPTP sessions over a stateful firewall or NAT
box, you may want to enable this feature.
- Please note that not all PPTP modes of operation are supported yet.
+ Please note that not all PPTP modes of operation are supported.
Specifically these limitations exist:
- Blindly assumes that control connections are always established
in PNS->PAC direction. This is a violation of RFC2637.
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index dd67004a5cc0..91582069f6d2 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -183,17 +183,16 @@ static int __nf_conncount_add(struct net *net,
return -ENOENT;
if (ct && nf_ct_is_confirmed(ct)) {
- /* local connections are confirmed in postrouting so confirmation
- * might have happened before hitting connlimit
+ /* Connection is confirmed but might still be in the setup phase.
+ * Only skip the tracking if it is fully assured. This guarantees
+ * that setup packets or retransmissions are properly counted and
+ * deduplicated.
*/
- if (skb->skb_iif != LOOPBACK_IFINDEX) {
+ if (test_bit(IPS_ASSURED_BIT, &ct->status)) {
err = -EEXIST;
goto out_put;
}
- /* this is likely a local connection, skip optimization to avoid
- * adding duplicates from a 'packet train'
- */
goto check_connections;
}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 400119b6320e..bf78828c7549 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -62,6 +62,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
if (exp == NULL)
goto out;
+ exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
helper = rcu_dereference(help->helper);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 49e18eda037e..38630c5e006f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -355,6 +355,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
+ exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
if (saddr) {
memcpy(&exp->tuple.src.u3, saddr, len);
if (sizeof(exp->tuple.src.u3) > len)
@@ -494,9 +496,15 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
p = &helper->expect_policy[expect->class];
- if (p->max_expected &&
- master_help->expecting[expect->class] >= p->max_expected)
+ if (master_help->expecting[expect->class] >= p->max_expected)
evict_oldest_expect(master_help, expect, p);
+ } else {
+ const struct nf_conntrack_expect_policy default_exp_policy = {
+ .max_expected = NF_CT_EXPECT_MAX_CNT,
+ };
+
+ if (master_help->expecting[expect->class] >= default_exp_policy.max_expected)
+ evict_oldest_expect(master_help, expect, &default_exp_policy);
}
cnet = nf_ct_pernet(net);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8b94001c2430..500509b17663 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -374,8 +374,13 @@ int __nf_conntrack_helper_register(struct nf_conntrack_helper *me)
if (!nf_ct_helper_hash)
return -ENOENT;
- if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
- return -EINVAL;
+ for (i = 0; i <= me->expect_class_max; i++) {
+ if (!me->expect_policy[i].max_expected)
+ me->expect_policy[i].max_expected = NF_CT_EXPECT_MAX_CNT;
+
+ if (me->expect_policy[i].max_expected > NF_CT_EXPECT_MAX_CNT)
+ return -EINVAL;
+ }
mutex_lock(&nf_ct_helper_mutex);
for (i = 0; i < nf_ct_helper_hsize; i++) {
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 0c117b8492e9..193ab34db795 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -262,6 +262,8 @@ static int __init nf_conntrack_irc_init(void)
{
int i, ret;
+ nf_conntrack_helper_deprecated(HELPER_NAME);
+
if (max_dcc_channels < 1) {
pr_err("max_dcc_channels must not be zero\n");
return -EINVAL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e78d2482989..4217715d42dc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1953,19 +1953,6 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
return err;
}
- if (!strcmp(helpname, "") && help) {
- helper = rcu_dereference(help->helper);
- if (helper) {
- /* we had a helper before ... */
- nf_ct_remove_expectations(ct);
- RCU_INIT_POINTER(help->helper, NULL);
- if (refcount_dec_and_test(&helper->ct_refcnt))
- kfree_rcu(helper, rcu);
- }
- rcu_read_unlock();
- return 0;
- }
-
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
@@ -3015,7 +3002,6 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
__s32 timeout = (__s32)(READ_ONCE(exp->timeout) - nfct_time_stamp) / HZ;
- struct nf_conn *master = exp->master;
struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
@@ -3030,9 +3016,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
goto nla_put_failure;
if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
goto nla_put_failure;
- if (ctnetlink_exp_dump_tuple(skb,
- &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- CTA_EXPECT_MASTER) < 0)
+ if (ctnetlink_exp_dump_tuple(skb, &exp->master_tuple, CTA_EXPECT_MASTER) < 0)
goto nla_put_failure;
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -3045,9 +3029,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
if (nla_put_be32(skb, CTA_EXPECT_NAT_DIR, htonl(exp->dir)))
goto nla_put_failure;
- nat_tuple.src.l3num = nf_ct_l3num(master);
+ nat_tuple.src.l3num = exp->master_tuple.src.l3num;
nat_tuple.src.u3 = exp->saved_addr;
- nat_tuple.dst.protonum = nf_ct_protonum(master);
+ nat_tuple.dst.protonum = exp->master_tuple.dst.protonum;
nat_tuple.src.u = exp->saved_proto;
if (ctnetlink_exp_dump_tuple(skb, &nat_tuple,
@@ -3589,6 +3573,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
#endif
rcu_assign_pointer(exp->helper, helper);
rcu_assign_pointer(exp->assign_helper, assign_helper);
+ exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 776505a78e64..80fc14c87ddc 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -545,6 +545,8 @@ static int __init nf_conntrack_pptp_init(void)
pptp.destroy = gre_pptp_destroy_siblings;
+ nf_conntrack_helper_deprecated(pptp.name);
+
return nf_conntrack_helper_register(&pptp, &pptp_ptr);
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index e7a3fb2b2d94..29e93ac1e2e4 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -326,8 +326,10 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
return false;
iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
- size = iph->ihl << 2;
+ if (iph->ihl < 5)
+ return false;
+ size = iph->ihl << 2;
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
return false;
@@ -335,9 +337,9 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
return false;
if (iph->protocol == IPPROTO_IPIP) {
- ctx->tun.proto = IPPROTO_IPIP;
+ ctx->tun.proto = iph->protocol;
ctx->tun.hdr_size = size;
- ctx->offset += size;
+ ctx->offset += ctx->tun.hdr_size;
}
return true;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2bbf5163c0e2..63ff6b4d5d21 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1181,6 +1181,16 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
struct nf_hook_ops *nat_ops;
int i, ret;
+#ifndef MODULE
+ /* If nf_nat_core is built-in and nf_nat_init() fails, dependent
+ * modules like nft_chain_nat.ko may still call this function.
+ * However, nat_net would be invalid, likely pointing to some other
+ * per-net structure.
+ */
+ if (WARN_ON_ONCE(!nf_nat_hook))
+ return -EOPNOTSUPP;
+#endif
+
if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
return -EINVAL;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 0caa9304d2d0..63864b928259 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -397,6 +397,22 @@ static int nft_target_validate(const struct nft_ctx *ctx,
return 0;
}
+static int nft_target_bridge_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct xt_target *target = expr->ops->data;
+
+ /* Do not allow UNSPEC to stand-in for NFPROTO_BRIDGE
+ * targets: they are incompatible. ebtables targets return
+ * EBT_ACCEPT, DROP and so on which are not compatible with
+ * NF_ACCEPT, NF_DROP and so on.
+ */
+ if (target->family != NFPROTO_BRIDGE)
+ return -ENOENT;
+
+ return nft_target_validate(ctx, expr);
+}
+
static void __nft_match_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt,
@@ -932,13 +948,15 @@ nft_target_select_ops(const struct nft_ctx *ctx,
ops->init = nft_target_init;
ops->destroy = nft_target_destroy;
ops->dump = nft_target_dump;
- ops->validate = nft_target_validate;
ops->data = target;
- if (family == NFPROTO_BRIDGE)
+ if (family == NFPROTO_BRIDGE) {
ops->eval = nft_target_eval_bridge;
- else
+ ops->validate = nft_target_bridge_validate;
+ } else {
ops->eval = nft_target_eval_xt;
+ ops->validate = nft_target_validate;
+ }
return ops;
err:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 958054dd2e2e..03a88c77e0f0 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1215,11 +1215,23 @@ struct nft_ct_expect_obj {
u32 timeout;
};
+static int nft_ct_expect_timeout_get(const struct nlattr *attr, u32 *val)
+{
+ unsigned long jiffies_val = msecs_to_jiffies(nla_get_u32(attr));
+
+ if (jiffies_val > UINT_MAX)
+ return -ERANGE;
+
+ *val = jiffies_val;
+ return 0;
+}
+
static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+ int err;
if (!tb[NFTA_CT_EXPECT_L4PROTO] ||
!tb[NFTA_CT_EXPECT_DPORT] ||
@@ -1254,8 +1266,11 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
+ err = nft_ct_expect_timeout_get(tb[NFTA_CT_EXPECT_TIMEOUT], &priv->timeout);
+ if (err)
+ return err;
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
- priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
return nf_ct_netns_get(ctx->net, ctx->family);
@@ -1275,7 +1290,7 @@ static int nft_ct_expect_obj_dump(struct sk_buff *skb,
if (nla_put_be16(skb, NFTA_CT_EXPECT_L3PROTO, htons(priv->l3num)) ||
nla_put_u8(skb, NFTA_CT_EXPECT_L4PROTO, priv->l4proto) ||
nla_put_be16(skb, NFTA_CT_EXPECT_DPORT, priv->dport) ||
- nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, priv->timeout) ||
+ nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, jiffies_to_msecs(priv->timeout)) ||
nla_put_u8(skb, NFTA_CT_EXPECT_SIZE, priv->size))
return -1;
@@ -1325,7 +1340,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
priv->l4proto, NULL, &priv->dport);
- exp->timeout += priv->timeout * HZ;
+ exp->timeout += priv->timeout;
if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 7641f249614c..9ed288c9d168 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -24,14 +24,13 @@ static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = {
static void nft_synproxy_tcp_options(struct synproxy_options *opts,
const struct tcphdr *tcp,
struct synproxy_net *snet,
- struct nf_synproxy_info *info,
- const struct nft_synproxy *priv)
+ struct nf_synproxy_info *info)
{
this_cpu_inc(snet->stats->syn_received);
if (tcp->ece && tcp->cwr)
opts->options |= NF_SYNPROXY_OPT_ECN;
- opts->options &= priv->info.options;
+ opts->options &= info->options;
opts->mss_encode = opts->mss_option;
opts->mss_option = info->mss;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
@@ -56,7 +55,7 @@ static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
if (tcp->syn) {
/* Initial SYN from client */
- nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+ nft_synproxy_tcp_options(opts, tcp, snet, &info);
synproxy_send_client_synack(net, skb, tcp, opts);
consume_skb(skb);
regs->verdict.code = NF_STOLEN;
@@ -87,7 +86,7 @@ static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
if (tcp->syn) {
/* Initial SYN from client */
- nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+ nft_synproxy_tcp_options(opts, tcp, snet, &info);
synproxy_send_client_synack_ipv6(net, skb, tcp, opts);
consume_skb(skb);
regs->verdict.code = NF_STOLEN;
diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
index d860f7d9744b..7261975957ef 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
@@ -2,18 +2,32 @@
# SPDX-License-Identifier: GPL-2.0
#
# Testing For SCTP COLLISION SCENARIO as Below:
-#
+# 1. Stale INIT_ACK capture:
# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+# (Delayed)
+#
+# 2. Stale INIT capture:
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# (Delayed)
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
#
# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
source lib.sh
+checktool "nft --version" "run test without nft"
+checktool "tc -h" "run test without tc"
+checktool "modprobe -q sctp" "load sctp module"
+
CLIENT_IP="198.51.200.1"
CLIENT_PORT=1234
@@ -24,7 +38,8 @@ CLIENT_GW="198.51.200.2"
SERVER_GW="198.51.100.2"
# setup the topo
-setup() {
+topo_setup() {
+ # setup_ns cleans up existing net namespaces first.
setup_ns CLIENT_NS SERVER_NS ROUTER_NS
ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS"
ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS"
@@ -38,35 +53,53 @@ setup() {
ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1
ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2
ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1
+ sysctl -wq net.netfilter.nf_log_all_netns=1
ip -n "$CLIENT_NS" link set link3 up
ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3
ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW
+}
+
+conf_delay()
+{
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK/INIT with
+ local ns=$1
+ local link=$2
+ local chunk_type=$3
- # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
- # tc on $SERVER_NS side
- tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64
- tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit
- tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
- 0xff match u8 2 0xff at 32 flowid 1:1
- if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+
+ tc -n "$ns" qdisc add dev "$link" root handle 1: htb r2q 64
+ tc -n "$ns" class add dev "$link" parent 1: classid 1:1 htb rate 100mbit
+ tc -n "$ns" filter add dev "$link" parent 1: protocol ip \
+ u32 match ip protocol 132 0xff match u8 "$chunk_type" 0xff at 32 flowid 1:1
+ if ! tc -n "$ns" qdisc add dev "$link" parent 1:1 handle 10: netem delay 1200ms; then
echo "SKIP: Cannot add netem qdisc"
- exit $ksft_skip
+ return $ksft_skip
fi
# simulate the ctstate check on OVS nf_conntrack
- ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
- ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP
-
- # use a smaller number for assoc's max_retrans to reproduce the issue
- modprobe -q sctp
- ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+ ip net exec "$ROUTER_NS" nft -f - <<-EOF
+ table ip t {
+ chain forward {
+ type filter hook forward priority filter; policy accept;
+ meta l4proto icmp counter accept
+ ct state new counter accept
+ ct state established,related counter accept
+ ct state invalid log flags all counter drop comment \
+ "Expect to drop stale INIT/INIT_ACK chunks"
+ counter
+ }
+ }
+ EOF
+ return 0
}
cleanup() {
- ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1
- ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1
+ # cleanup_all_ns terminates running processes in the namespaces.
cleanup_all_ns
+ sysctl -wq net.netfilter.nf_log_all_netns=0
}
do_test() {
@@ -81,7 +114,19 @@ do_test() {
# run the test case
trap cleanup EXIT
-setup && \
-echo "Test for SCTP Collision in nf_conntrack:" && \
-do_test && echo "PASS!"
-exit $?
+
+echo "Test for SCTP INIT_ACK Collision in nf_conntrack:"
+topo_setup || exit $?
+conf_delay $SERVER_NS link0 2 || exit $?
+
+if ! do_test; then
+ exit $ksft_fail
+fi
+
+echo "Test for SCTP INIT Collision in nf_conntrack:"
+topo_setup || exit $?
+conf_delay $CLIENT_NS link3 1 || exit $?
+
+if ! do_test; then
+ exit $ksft_fail
+fi
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index d80390848e85..7c857a2e0f34 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -85,11 +85,12 @@ ip -net "$ns3" route add default via 10.0.3.1
ip -net "$ns3" route add default via dead:3::1
load_ruleset() {
- local name=$1
- local prio=$2
+ local family=$1
+ local name=$2
+ local prio=$3
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
-table inet $name {
+table $family $name {
chain nfq {
ip protocol icmp queue bypass
icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
@@ -228,6 +229,7 @@ nf_queue_wait()
test_queue()
{
local expected="$1"
+ local family="$2"
local last=""
# spawn nf_queue listeners
@@ -255,11 +257,13 @@ test_queue()
if [ x"$last" != x"$expected packets total" ]; then
echo "FAIL: Expected $expected packets total, but got $last" 1>&2
ip netns exec "$nsrouter" nft list ruleset
+ echo -n "$TMPFILE0: ";cat "$TMPFILE0"
+ echo -n "$TMPFILE1: ";cat "$TMPFILE1"
exit 1
fi
done
- echo "PASS: Expected and received $last"
+ echo "PASS: Expected and received $last ($family)"
}
listener_ready()
@@ -400,6 +404,8 @@ EOF
kill "$nfqpid"
echo "PASS: icmp+nfqueue via vrf"
+ ip -net "$ns1" link del tvrf
+ ip netns exec "$ns1" nft flush ruleset
}
sctp_listener_ready()
@@ -814,12 +820,53 @@ EOF
check_tainted "queue program exiting while packets queued"
}
+test_queue_bridge()
+{
+ ip -net "$nsrouter" addr flush dev veth0
+ ip -net "$nsrouter" addr flush dev veth1
+
+ ip -net "$nsrouter" link add br0 type bridge
+ ip -net "$nsrouter" link set veth0 master br0
+ ip -net "$nsrouter" link set veth1 master br0
+
+ ip -net "$nsrouter" link set br0 up
+
+ ip -net "$nsrouter" addr add 10.0.2.1/16 dev br0
+ ip -net "$nsrouter" addr add dead:2::1/64 dev br0 nodad
+
+ ip -net "$ns1" addr flush dev eth0
+ ip -net "$ns2" addr flush dev eth0
+
+ ip -net "$ns1" addr add 10.0.1.1/16 dev eth0
+ ip -net "$ns1" addr add dead:2::2/64 dev eth0 nodad
+
+ ip -net "$ns2" addr add 10.0.2.99/16 dev eth0
+ ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+
+ ip netns exec "$nsrouter" nft flush ruleset
+
+ ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=0 > /dev/null
+ ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=0 > /dev/null
+ ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=0 > /dev/null
+
+ if ! test_ping;then
+ echo "FAIL: netns bridge connectivity" 1>&2
+ exit $ret
+ fi
+
+ load_ruleset "bridge" "filter" 10
+ test_queue 10 "bridge"
+
+ load_ruleset "bridge" "filter2" 20
+ test_queue 20 "bridge"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
-load_ruleset "filter" 0
+load_ruleset "inet" "filter" 0
if test_ping; then
# queue bypass works (rules were skipped, no listener)
@@ -842,11 +889,11 @@ load_counter_ruleset 10
# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
# so we expect that userspace program receives 10 packets.
-test_queue 10
+test_queue 10 "inet"
# same. We queue to a second program as well.
-load_ruleset "filter2" 20
-test_queue 20
+load_ruleset "inet" "filter2" 20
+test_queue 20 "inet"
ip netns exec "$ns1" nft flush ruleset
test_tcp_forward
@@ -863,4 +910,7 @@ test_queue_stress
test_icmp_vrf
test_queue_removal
+# turns router into a bridge
+test_queue_bridge
+
exit $ret