From 60fb9567bf30937e6bedfa939d7c8fd4ee6a1b1c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:28 +0100 Subject: udp: implement complete book-keeping for encap_needed The *encap_needed static keys are enabled by UDP tunnels and several UDP encapsulations type, but they are never turned off. This can cause unneeded overall performance degradation for systems where such features are used transiently. This patch introduces complete book-keeping for such keys, decreasing the usage at socket destruction time, if needed, and avoiding that the same socket could increase the key usage multiple times. rfc v3 -> v1: - add socket lock around udp_tunnel_encap_enable() rfc v2 -> rfc v3: - use udp_tunnel_encap_enable() in setsockopt() Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 7 ++++++- include/net/udp_tunnel.h | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index 320d49d85484..a4dafff407fb 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -49,7 +49,12 @@ struct udp_sock { unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ + no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + encap_enabled:1; /* This socket enabled encap + * processing; UDP tunnels and + * different encapsulation layer set + * this + */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index fe680ab6b15a..3fbe56430e3b 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -165,6 +165,12 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) static inline void udp_tunnel_encap_enable(struct socket *sock) { + struct udp_sock *up = udp_sk(sock->sk); + + if (up->encap_enabled) + return; + + up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); -- cgit v1.2.3 From e20cf8d3f1f763ad28a9cb3b41305b8a8a42653e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:29 +0100 Subject: udp: implement GRO for plain UDP sockets. This is the RX counterpart of commit bec1f6f69736 ("udp: generate gso with UDP_SEGMENT"). When UDP_GRO is enabled, such socket is also eligible for GRO in the rx path: UDP segments directed to such socket are assembled into a larger GSO_UDP_L4 packet. The core UDP GRO support is enabled with setsockopt(UDP_GRO). Initial benchmark numbers: Before: udp rx: 1079 MB/s 769065 calls/s After: udp rx: 1466 MB/s 24877 calls/s This change introduces a side effect in respect to UDP tunnels: after a UDP tunnel creation, now the kernel performs a lookup per ingress UDP packet, while before such lookup happened only if the ingress packet carried a valid internal header csum. rfc v2 -> rfc v3: - fixed typos in macro name and comments - really enforce UDP_GRO_CNT_MAX, instead of UDP_GRO_CNT_MAX + 1 - acquire socket lock in UDP_GRO setsockopt rfc v1 -> rfc v2: - use a new option to enable UDP GRO - use static keys to protect the UDP GRO socket lookup Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 3 ++- include/uapi/linux/udp.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index a4dafff407fb..f613b329852e 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -50,11 +50,12 @@ struct udp_sock { __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ - encap_enabled:1; /* This socket enabled encap + encap_enabled:1, /* This socket enabled encap * processing; UDP tunnels and * different encapsulation layer set * this */ + gro_enabled:1; /* Can accept GRO packets */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09502de447f5..30baccb6c9c4 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -33,6 +33,7 @@ struct udphdr { #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ +#define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -- cgit v1.2.3 From bcd1665e3569b0a6f569514f023a41fc7df0b4a3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:30 +0100 Subject: udp: add support for UDP_GRO cmsg When UDP GRO is enabled, the UDP_GRO cmsg will carry the ingress datagram size. User-space can use such info to compute the original packets layout. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index f613b329852e..e23d5024f42f 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -121,6 +121,17 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } +static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + int gso_size; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + gso_size = skb_shinfo(skb)->gso_size; + put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size); + } +} + #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -- cgit v1.2.3 From 68cb7d531e6a87250a51b8a4ee1c79b3445aeff3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:31 +0100 Subject: ip: factor out protocol delivery helper So that we can re-use it at the UDP level in a later patch rfc v3 -> v1 - add the helper declaration into the ip header Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 462182f78236..8866bfce6121 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -155,6 +155,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); +void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From 80bde363f9a43d942e404821b966e362131cd0ca Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:32 +0100 Subject: ipv6: factor out protocol delivery helper So that we can re-use it at the UDP level in the next patch rfc v3 -> v1: - add the helper declaration into the ipv6 header Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 829650540780..daf80863d3a5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -975,6 +975,8 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_forward(struct sk_buff *skb); int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final); int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From cf329aa42b6659204fee865bbce0ea20462552eb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Nov 2018 12:38:33 +0100 Subject: udp: cope with UDP GRO packet misdirection In some scenarios, the GRO engine can assemble an UDP GRO packet that ultimately lands on a non GRO-enabled socket. This patch tries to address the issue explicitly checking for the UDP socket features before enqueuing the packet, and eventually segmenting the unexpected GRO packet, as needed. We must also cope with re-insertion requests: after segmentation the UDP code calls the helper introduced by the previous patches, as needed. Segmentation is performed by a common helper, which takes care of updating socket and protocol stats is case of failure. rfc v3 -> v1 - fix compile issues with rxrpc - when gso_segment returns NULL, treat is as an error - added 'ipv4' argument to udp_rcv_segment() rfc v2 -> rfc v3 - moved udp_rcv_segment() into net/udp.h, account errors to socket and ns, always return NULL or segs list Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 6 ++++++ include/net/udp.h | 45 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/udp.h b/include/linux/udp.h index e23d5024f42f..0a9c54e76305 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -132,6 +132,12 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) +{ + return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4; +} + #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) diff --git a/include/net/udp.h b/include/net/udp.h index a496e441645e..eccca2325ee6 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -417,17 +417,24 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_INC_STATS(sk, field) \ -do { \ - if ((sk)->sk_family == AF_INET) \ - __UDP_INC_STATS(sock_net(sk), field, 0); \ - else \ - __UDP6_INC_STATS(sock_net(sk), field, 0); \ -} while (0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics) : \ + (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ + sock_net(sk)->mib.udp_stats_in6); \ +}) #else -#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics; \ +}) #endif +#define __UDPX_INC_STATS(sk, field) \ + __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) + #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; @@ -461,4 +468,26 @@ DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif +static inline struct sk_buff *udp_rcv_segment(struct sock *sk, + struct sk_buff *skb, bool ipv4) +{ + struct sk_buff *segs; + + /* the GSO CB lays after the UDP one, no need to save and restore any + * CB fragment + */ + segs = __skb_gso_segment(skb, NETIF_F_SG, false); + if (unlikely(IS_ERR_OR_NULL(segs))) { + int segs_nr = skb_shinfo(skb)->gso_segs; + + atomic_add(segs_nr, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); + kfree_skb(skb); + return NULL; + } + + consume_skb(skb); + return segs; +} + #endif /* _UDP_H */ -- cgit v1.2.3