diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-06-01 17:54:55 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-06-01 17:54:55 +0200 |
| commit | d0acb5202d0e33d23cbe6994424587fbb05a5360 (patch) | |
| tree | 4b7fd07149896994fb739e1cbb5d65f2e47cd6d7 /net | |
| parent | 55f3722fc694d6478eca3020b12a4d6a79b06f27 (diff) | |
| parent | bb532bfaf7919c7c98caab81864e9ce2646e11e3 (diff) | |
Merge v7.0.11linux-rolling-stable
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
96 files changed, 1745 insertions, 950 deletions
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 74ef7dc2b2f9..b8b1b997960a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -224,6 +224,8 @@ static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) hard_iface->bat_iv.ogm_buff = NULL; mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + + cancel_delayed_work_sync(&hard_iface->bat_iv.reschedule_work); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) @@ -536,8 +538,10 @@ out: * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm + * + * Return: whether forward packet was scheduled */ -static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, +static bool batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, struct batadv_hard_iface *if_incoming, @@ -561,13 +565,13 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!skb) - return; + return false; forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, queue_left, bat_priv, skb); if (!forw_packet_aggr) { kfree_skb(skb); - return; + return false; } forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; @@ -590,6 +594,8 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, batadv_iv_send_outstanding_bat_ogm_packet); batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time); + + return true; } /* aggregate a new packet into the existing ogm packet */ @@ -617,8 +623,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm * @send_time: timestamp (jiffies) when the packet is to be sent + * + * Return: whether forward packet was scheduled */ -static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, +static bool batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, unsigned char *packet_buff, int packet_len, struct batadv_hard_iface *if_incoming, @@ -670,14 +678,16 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, if (!own_packet && atomic_read(&bat_priv->aggregated_ogms)) send_time += max_aggregation_jiffies; - batadv_iv_ogm_aggregate_new(packet_buff, packet_len, - send_time, direct_link, - if_incoming, if_outgoing, - own_packet); + return batadv_iv_ogm_aggregate_new(packet_buff, packet_len, + send_time, direct_link, + if_incoming, if_outgoing, + own_packet); } else { batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, direct_link); spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + return true; } } @@ -790,6 +800,9 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) u32 seqno; u16 tvlv_len = 0; unsigned long send_time; + bool reschedule = false; + bool scheduled; + int ret; lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); @@ -813,9 +826,15 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); - tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, - ogm_buff_len, - BATADV_OGM_HLEN); + ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM_HLEN); + if (ret < 0) { + reschedule = true; + goto out; + } + + tvlv_len = ret; } batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); @@ -834,8 +853,11 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) /* OGMs from secondary interfaces are only scheduled on their * respective interfaces. */ - batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, - hard_iface, hard_iface, 1, send_time); + scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, + hard_iface, hard_iface, 1, send_time); + if (!scheduled) + reschedule = true; + goto out; } @@ -847,15 +869,28 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) continue; - batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, - *ogm_buff_len, hard_iface, - tmp_hard_iface, 1, send_time); - + scheduled = batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, + *ogm_buff_len, hard_iface, + tmp_hard_iface, 1, send_time); batadv_hardif_put(tmp_hard_iface); + + if (!scheduled && tmp_hard_iface == hard_iface) + reschedule = true; } rcu_read_unlock(); out: + if (reschedule) { + /* there was a failure scheduling the own forward packet. + * as result, the batadv_iv_send_outstanding_bat_ogm_packet() + * work item is no longer scheduled. it is therefore necessary + * to reschedule it manually + */ + queue_delayed_work(batadv_event_workqueue, + &hard_iface->bat_iv.reschedule_work, + msecs_to_jiffies(atomic_read(&bat_priv->orig_interval))); + } + batadv_hardif_put(primary_if); } @@ -870,6 +905,17 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } +static void batadv_iv_ogm_reschedule(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct batadv_hard_iface *hard_iface; + + hard_iface = container_of(delayed_work, + struct batadv_hard_iface, + bat_iv.reschedule_work); + batadv_iv_ogm_schedule(hard_iface); +} + /** * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface * @orig_node: originator which reproadcasted the OGMs directly @@ -2262,6 +2308,8 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { + INIT_DELAYED_WORK(&hard_iface->bat_iv.reschedule_work, batadv_iv_ogm_reschedule); + /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); } diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e3870492dab7..d66ca77b1aaa 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -113,14 +113,14 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv) /** * batadv_v_ogm_send_to_if() - send a batman ogm using a given interface + * @bat_priv: the bat priv with all the mesh interface information * @skb: the OGM to send * @hard_iface: the interface to use to send the OGM */ -static void batadv_v_ogm_send_to_if(struct sk_buff *skb, +static void batadv_v_ogm_send_to_if(struct batadv_priv *bat_priv, + struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); - if (hard_iface->if_status != BATADV_IF_ACTIVE) { kfree_skb(skb); return; @@ -187,6 +187,7 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) /** * batadv_v_ogm_aggr_send() - flush & send aggregation queue + * @bat_priv: the bat priv with all the mesh interface information * @hard_iface: the interface with the aggregation queue to flush * * Aggregates all OGMv2 packets currently in the aggregation queue into a @@ -196,7 +197,8 @@ static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. */ -static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) +static void batadv_v_ogm_aggr_send(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) { unsigned int aggr_len = hard_iface->bat_v.aggr_len; struct sk_buff *skb_aggr; @@ -226,27 +228,32 @@ static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) consume_skb(skb); } - batadv_v_ogm_send_to_if(skb_aggr, hard_iface); + batadv_v_ogm_send_to_if(bat_priv, skb_aggr, hard_iface); } /** * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface + * @bat_priv: the bat priv with all the mesh interface information * @skb: the OGM to queue * @hard_iface: the interface to queue the OGM on */ -static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, +static void batadv_v_ogm_queue_on_if(struct batadv_priv *bat_priv, + struct sk_buff *skb, struct batadv_hard_iface *hard_iface) { - struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface); + if (hard_iface->mesh_iface != bat_priv->mesh_iface) { + kfree_skb(skb); + return; + } if (!atomic_read(&bat_priv->aggregated_ogms)) { - batadv_v_ogm_send_to_if(skb, hard_iface); + batadv_v_ogm_send_to_if(bat_priv, skb, hard_iface); return; } spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); if (!batadv_v_ogm_queue_left(skb, hard_iface)) - batadv_v_ogm_aggr_send(hard_iface); + batadv_v_ogm_aggr_send(bat_priv, hard_iface); hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); __skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); @@ -262,10 +269,10 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) struct batadv_hard_iface *hard_iface; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; - unsigned char *ogm_buff; + unsigned char **ogm_buff; struct list_head *iter; - int ogm_buff_len; - u16 tvlv_len = 0; + int *ogm_buff_len; + u16 tvlv_len; int ret; lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); @@ -273,25 +280,27 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; - ogm_buff = bat_priv->bat_v.ogm_buff; - ogm_buff_len = bat_priv->bat_v.ogm_buff_len; + ogm_buff = &bat_priv->bat_v.ogm_buff; + ogm_buff_len = &bat_priv->bat_v.ogm_buff_len; + /* tt changes have to be committed before the tvlv data is * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); - tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff, - &ogm_buff_len, - BATADV_OGM2_HLEN); + ret = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, + ogm_buff_len, + BATADV_OGM2_HLEN); + if (ret < 0) + goto reschedule; - bat_priv->bat_v.ogm_buff = ogm_buff; - bat_priv->bat_v.ogm_buff_len = ogm_buff_len; + tvlv_len = ret; - skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len); + skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + *ogm_buff_len); if (!skb) goto reschedule; skb_reserve(skb, ETH_HLEN); - skb_put_data(skb, ogm_buff, ogm_buff_len); + skb_put_data(skb, *ogm_buff, *ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); @@ -343,7 +352,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv) break; } - batadv_v_ogm_queue_on_if(skb_tmp, hard_iface); + batadv_v_ogm_queue_on_if(bat_priv, skb_tmp, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -383,12 +392,14 @@ void batadv_v_ogm_aggr_work(struct work_struct *work) { struct batadv_hard_iface_bat_v *batv; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work); hard_iface = container_of(batv, struct batadv_hard_iface, bat_v); + bat_priv = netdev_priv(hard_iface->mesh_iface); spin_lock_bh(&hard_iface->bat_v.aggr_list.lock); - batadv_v_ogm_aggr_send(hard_iface); + batadv_v_ogm_aggr_send(bat_priv, hard_iface); spin_unlock_bh(&hard_iface->bat_v.aggr_list.lock); batadv_v_ogm_start_queue_timer(hard_iface); @@ -578,7 +589,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), ogm_forward->ttl, if_incoming->net_dev->name); - batadv_v_ogm_queue_on_if(skb, if_outgoing); + batadv_v_ogm_queue_on_if(bat_priv, skb, if_outgoing); out: batadv_orig_ifinfo_put(orig_ifinfo); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index cec11f1251d6..ffe854018bd3 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -356,12 +356,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, const u8 *mac, sizeof(local_claim_dest)); local_claim_dest.type = claimtype; - mesh_iface = primary_if->mesh_iface; + mesh_iface = READ_ONCE(primary_if->mesh_iface); + if (!mesh_iface) + goto out; skb = arp_create(ARPOP_REPLY, ETH_P_ARP, /* IP DST: 0.0.0.0 */ zeroip, - primary_if->mesh_iface, + mesh_iface, /* IP SRC: 0.0.0.0 */ zeroip, /* Ethernet DST: Broadcast */ @@ -514,8 +516,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, entry->crc = BATADV_BLA_CRC_INIT; entry->bat_priv = bat_priv; spin_lock_init(&entry->crc_lock); - atomic_set(&entry->request_sent, 0); - atomic_set(&entry->wait_periods, 0); + entry->state = BATADV_BLA_BACKBONE_GW_SYNCED; + entry->wait_periods = 0; ether_addr_copy(entry->orig, orig); INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report); kref_init(&entry->refcount); @@ -544,9 +546,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, const u8 *orig, batadv_bla_send_announce(bat_priv, entry); /* this will be decreased in the worker thread */ - atomic_inc(&entry->request_sent); - atomic_set(&entry->wait_periods, BATADV_BLA_WAIT_PERIODS); - atomic_inc(&bat_priv->bla.num_requests); + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (entry->state == BATADV_BLA_BACKBONE_GW_SYNCED) { + entry->state = BATADV_BLA_BACKBONE_GW_UNSYNCED; + entry->wait_periods = BATADV_BLA_WAIT_PERIODS; + atomic_inc(&bat_priv->bla.num_requests); + } + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } return entry; @@ -649,10 +655,12 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw) backbone_gw->vid, BATADV_CLAIM_TYPE_REQUEST); /* no local broadcasts should be sent or received, for now. */ - if (!atomic_read(&backbone_gw->request_sent)) { + spin_lock_bh(&backbone_gw->bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_SYNCED) { + backbone_gw->state = BATADV_BLA_BACKBONE_GW_UNSYNCED; atomic_inc(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 1); } + spin_unlock_bh(&backbone_gw->bat_priv->bla.num_requests_lock); } /** @@ -873,10 +881,12 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, /* if we have sent a request and the crc was OK, * we can allow traffic again. */ - if (atomic_read(&backbone_gw->request_sent)) { + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) { + backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED; atomic_dec(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 0); } + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } batadv_backbone_gw_put(backbone_gw); @@ -1224,6 +1234,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) struct hlist_head *head; struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ + bool purged; int i; hash = bat_priv->bla.backbone_hash; @@ -1234,30 +1245,49 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) head = &hash->table[i]; list_lock = &hash->list_locks[i]; - spin_lock_bh(list_lock); - hlist_for_each_entry_safe(backbone_gw, node_tmp, - head, hash_entry) { - if (now) - goto purge_now; - if (!batadv_has_timed_out(backbone_gw->lasttime, - BATADV_BLA_BACKBONE_TIMEOUT)) - continue; + do { + purged = false; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(backbone_gw, node_tmp, + head, hash_entry) { + if (now) + goto purge_now; + if (!batadv_has_timed_out(backbone_gw->lasttime, + BATADV_BLA_BACKBONE_TIMEOUT)) + continue; - batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, - "%s(): backbone gw %pM timed out\n", - __func__, backbone_gw->orig); + batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, + "%s(): backbone gw %pM timed out\n", + __func__, backbone_gw->orig); purge_now: - /* don't wait for the pending request anymore */ - if (atomic_read(&backbone_gw->request_sent)) - atomic_dec(&bat_priv->bla.num_requests); + purged = true; - batadv_bla_del_backbone_claims(backbone_gw); + /* don't wait for the pending request anymore */ + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state == BATADV_BLA_BACKBONE_GW_UNSYNCED) + atomic_dec(&bat_priv->bla.num_requests); - hlist_del_rcu(&backbone_gw->hash_entry); - batadv_backbone_gw_put(backbone_gw); - } - spin_unlock_bh(list_lock); + backbone_gw->state = BATADV_BLA_BACKBONE_GW_STOPPED; + spin_unlock_bh(&bat_priv->bla.num_requests_lock); + + batadv_bla_del_backbone_claims(backbone_gw); + + hlist_del_rcu(&backbone_gw->hash_entry); + break; + } + spin_unlock_bh(list_lock); + + if (purged) { + /* reference for pending report_work */ + if (cancel_work_sync(&backbone_gw->report_work)) + batadv_backbone_gw_put(backbone_gw); + + /* reference for hash_entry */ + batadv_backbone_gw_put(backbone_gw); + } + } while (purged); } } @@ -1492,7 +1522,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) batadv_bla_send_loopdetect(bat_priv, backbone_gw); - /* request_sent is only set after creation to avoid + /* state is only set to unsynced after creation to avoid * problems when we are not yet known as backbone gw * in the backbone. * @@ -1501,14 +1531,21 @@ static void batadv_bla_periodic_work(struct work_struct *work) * some grace time. */ - if (atomic_read(&backbone_gw->request_sent) == 0) - continue; + spin_lock_bh(&bat_priv->bla.num_requests_lock); + if (backbone_gw->state != BATADV_BLA_BACKBONE_GW_UNSYNCED) + goto unlock_next; - if (!atomic_dec_and_test(&backbone_gw->wait_periods)) - continue; + if (backbone_gw->wait_periods > 0) + backbone_gw->wait_periods--; + if (backbone_gw->wait_periods > 0) + goto unlock_next; + + backbone_gw->state = BATADV_BLA_BACKBONE_GW_SYNCED; atomic_dec(&backbone_gw->bat_priv->bla.num_requests); - atomic_set(&backbone_gw->request_sent, 0); + +unlock_next: + spin_unlock_bh(&bat_priv->bla.num_requests_lock); } rcu_read_unlock(); } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 3efc4cf50b46..0a8bd95e2f99 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -696,6 +696,9 @@ static bool batadv_dat_forward_data(struct batadv_priv *bat_priv, goto free_orig; tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!tmp_skb) + goto free_neigh; + if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb, cand[i].orig_node, packet_subtype)) { diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index f4e45cc25816..4a594aa2ebf6 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -17,6 +17,7 @@ #include <linux/lockdep.h> #include <linux/minmax.h> #include <linux/netdevice.h> +#include <linux/overflow.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -80,9 +81,9 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node, * * Return: the maximum size of payload that can be fragmented. */ -static int batadv_frag_size_limit(void) +static size_t batadv_frag_size_limit(void) { - int limit = BATADV_FRAG_MAX_FRAG_SIZE; + size_t limit = BATADV_FRAG_MAX_FRAG_SIZE; limit -= sizeof(struct batadv_frag_packet); limit *= BATADV_FRAG_MAX_FRAGMENTS; @@ -143,7 +144,9 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, struct batadv_frag_packet *frag_packet; u8 bucket; u16 seqno, hdr_size = sizeof(struct batadv_frag_packet); + bool overflow = false; bool ret = false; + size_t data_len; /* Linearize packet to avoid linearizing 16 packets in a row when doing * the later merge. Non-linear merge should be added to remove this @@ -153,6 +156,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, goto err; frag_packet = (struct batadv_frag_packet *)skb->data; + data_len = skb->len - hdr_size; seqno = ntohs(frag_packet->seqno); bucket = seqno % BATADV_FRAG_BUFFER_COUNT; @@ -171,7 +175,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, spin_lock_bh(&chain->lock); if (batadv_frag_init_chain(chain, seqno)) { hlist_add_head(&frag_entry_new->list, &chain->fragment_list); - chain->size = skb->len - hdr_size; + chain->size = data_len; chain->timestamp = jiffies; chain->total_size = ntohs(frag_packet->total_size); ret = true; @@ -188,7 +192,11 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, if (frag_entry_curr->no < frag_entry_new->no) { hlist_add_before(&frag_entry_new->list, &frag_entry_curr->list); - chain->size += skb->len - hdr_size; + + if (check_add_overflow(chain->size, data_len, + &chain->size)) + overflow = true; + chain->timestamp = jiffies; ret = true; goto out; @@ -201,13 +209,16 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node, /* Reached the end of the list, so insert after 'frag_entry_last'. */ if (likely(frag_entry_last)) { hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list); - chain->size += skb->len - hdr_size; + + if (check_add_overflow(chain->size, data_len, &chain->size)) + overflow = true; + chain->timestamp = jiffies; ret = true; } out: - if (chain->size > batadv_frag_size_limit() || + if (overflow || chain->size > batadv_frag_size_limit() || chain->total_size != ntohs(frag_packet->total_size) || chain->total_size > batadv_frag_size_limit()) { /* Clear chain if total size of either the list or the packet @@ -294,6 +305,31 @@ free: } /** + * batadv_skb_is_frag() - check if newly merged skb is gain a unicast packet + * @skb: newly merged skb + * + * Return: if newly skb is of type BATADV_UNICAST_FRAG + */ +static bool batadv_skb_is_frag(struct sk_buff *skb) +{ + struct batadv_ogm_packet *batadv_ogm_packet; + + /* packet should hold at least type and version */ + if (unlikely(!pskb_may_pull(skb, 2))) + return false; + + batadv_ogm_packet = (struct batadv_ogm_packet *)skb->data; + + if (batadv_ogm_packet->version != BATADV_COMPAT_VERSION) + return false; + + if (batadv_ogm_packet->packet_type != BATADV_UNICAST_FRAG) + return false; + + return true; +} + +/** * batadv_frag_skb_buffer() - buffer fragment for later merge * @skb: skb to buffer * @orig_node_src: originator that the skb is received from @@ -326,6 +362,16 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb, if (!skb_out) goto out_err; + /* fragment in fragment is not allowed. otherwise it is possible + * to exhaust the stack when receiving a matryoshka-style + * "fragments in a fragment packet" + */ + if (batadv_skb_is_frag(skb_out)) { + kfree_skb(skb_out); + skb_out = NULL; + goto out_err; + } + out: ret = true; out_err: diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 51e9c081a2a4..a9d0346e8332 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -478,10 +478,14 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv, */ void batadv_gw_node_free(struct batadv_priv *bat_priv) { + struct batadv_gw_node *curr_gw; struct batadv_gw_node *gw_node; struct hlist_node *node_tmp; spin_lock_bh(&bat_priv->gw.list_lock); + curr_gw = rcu_replace_pointer(bat_priv->gw.curr_gw, NULL, true); + batadv_gw_node_put(curr_gw); + hlist_for_each_entry_safe(gw_node, node_tmp, &bat_priv->gw.gateway_list, list) { hlist_del_init_rcu(&gw_node->list); diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c index 56ca1c1b83f2..e7aa45bc6b7a 100644 --- a/net/batman-adv/mesh-interface.c +++ b/net/batman-adv/mesh-interface.c @@ -787,6 +787,7 @@ static int batadv_meshif_init_late(struct net_device *dev) atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); + spin_lock_init(&bat_priv->bla.num_requests_lock); #endif atomic_set(&bat_priv->tp_num, 0); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index b3468ccab535..ad4921b659d9 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -835,8 +835,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) orig_node = container_of(rcu, struct batadv_orig_node, rcu); - batadv_mcast_purge_orig(orig_node); - batadv_frag_purge_orig(orig_node, NULL); kfree(orig_node->tt_buff); @@ -887,6 +885,8 @@ void batadv_orig_node_release(struct kref *ref) } spin_unlock_bh(&orig_node->vlan_list_lock); + batadv_mcast_purge_orig(orig_node); + call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu); } diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 066c76113fc4..0fc4ca78e84e 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -8,6 +8,7 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bug.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> @@ -254,6 +255,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * batadv_tp_list_find() - find a tp_vars object in the global list * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for + * @role: role of the session * * Look for a tp_vars object matching dst as end_point and return it after * having increment the refcounter. Return NULL is not found @@ -261,7 +263,8 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * Return: matching tp_vars or NULL when no tp_vars with @dst was found */ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, - const u8 *dst) + const u8 *dst, + enum batadv_tp_meter_role role) { struct batadv_tp_vars *pos, *tp_vars = NULL; @@ -270,6 +273,9 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, if (!batadv_compare_eth(pos->other_end, dst)) continue; + if (pos->role != role) + continue; + /* most of the time this function is invoked during the normal * process..it makes sens to pay more when the session is * finished and to speed the process up during the measurement @@ -286,11 +292,32 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, } /** + * batadv_tp_list_active() - check if session from/to destination is ongoing + * @bat_priv: the bat priv with all the mesh interface information + * @dst: the other endpoint MAC address to look for + * + * Return: if matching session with @dst was found + */ +static bool batadv_tp_list_active(struct batadv_priv *bat_priv, const u8 *dst) + __must_hold(&bat_priv->tp_list_lock) +{ + struct batadv_tp_vars *tp_vars; + + hlist_for_each_entry_rcu(tp_vars, &bat_priv->tp_list, list) { + if (batadv_compare_eth(tp_vars->other_end, dst)) + return true; + } + + return false; +} + +/** * batadv_tp_list_find_session() - find tp_vars session object in the global * list * @bat_priv: the bat priv with all the mesh interface information * @dst: the other endpoint MAC address to look for * @session: session identifier + * @role: role of the session * * Look for a tp_vars object matching dst as end_point, session as tp meter * session and return it after having increment the refcounter. Return NULL @@ -300,7 +327,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, */ static struct batadv_tp_vars * batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, - const u8 *session) + const u8 *session, enum batadv_tp_meter_role role) { struct batadv_tp_vars *pos, *tp_vars = NULL; @@ -312,6 +339,9 @@ batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, if (memcmp(pos->session, session, sizeof(pos->session)) != 0) continue; + if (pos->role != role) + continue; + /* most of the time this function is invoked during the normal * process..it makes sense to pay more when the session is * finished and to speed the process up during the measurement @@ -400,13 +430,7 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) batadv_tp_list_detach(tp_vars); /* kill the timer and remove its reference */ - timer_delete_sync(&tp_vars->timer); - /* the worker might have rearmed itself therefore we kill it again. Note - * that if the worker should run again before invoking the following - * timer_delete(), it would not re-arm itself once again because the status - * is OFF now - */ - timer_delete(&tp_vars->timer); + timer_shutdown_sync(&tp_vars->timer); batadv_tp_vars_put(tp_vars); } @@ -418,11 +442,14 @@ static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars) static void batadv_tp_sender_end(struct batadv_priv *bat_priv, struct batadv_tp_vars *tp_vars) { + enum batadv_tp_meter_reason reason; u32 session_cookie; + reason = atomic_read(&tp_vars->send_result); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Test towards %pM finished..shutting down (reason=%d)\n", - tp_vars->other_end, tp_vars->reason); + tp_vars->other_end, reason); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n", @@ -435,7 +462,7 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, session_cookie = batadv_tp_session_cookie(tp_vars->session, tp_vars->icmp_uid); - batadv_tp_batctl_notify(tp_vars->reason, + batadv_tp_batctl_notify(reason, tp_vars->other_end, bat_priv, tp_vars->start_time, @@ -451,10 +478,18 @@ static void batadv_tp_sender_end(struct batadv_priv *bat_priv, static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, enum batadv_tp_meter_reason reason) { - if (!atomic_dec_and_test(&tp_vars->sending)) - return; + atomic_cmpxchg(&tp_vars->send_result, 0, reason); +} - tp_vars->reason = reason; +/** + * batadv_tp_sender_stopped() - check if tp session was stopped with reason + * @tp_vars: the private data of the current TP meter session + * + * Return: whether stop reason was found + */ +static bool batadv_tp_sender_stopped(struct batadv_tp_vars *tp_vars) +{ + return atomic_read(&tp_vars->send_result) != 0; } /** @@ -484,7 +519,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) /* most of the time this function is invoked while normal packet * reception... */ - if (unlikely(atomic_read(&tp_vars->sending) == 0)) + if (unlikely(batadv_tp_sender_stopped(tp_vars))) /* timer ref will be dropped in batadv_tp_sender_cleanup */ return; @@ -504,7 +539,7 @@ static void batadv_tp_sender_timeout(struct timer_list *t) struct batadv_tp_vars *tp_vars = timer_container_of(tp_vars, t, timer); struct batadv_priv *bat_priv = tp_vars->bat_priv; - if (atomic_read(&tp_vars->sending) == 0) + if (batadv_tp_sender_stopped(tp_vars)) return; /* if the user waited long enough...shutdown the test */ @@ -659,11 +694,11 @@ static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, /* find the tp_vars */ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_SENDER); if (unlikely(!tp_vars)) return; - if (unlikely(atomic_read(&tp_vars->sending) == 0)) + if (unlikely(batadv_tp_sender_stopped(tp_vars))) goto out; /* old ACK? silently drop it.. */ @@ -829,21 +864,21 @@ static int batadv_tp_send(void *arg) if (unlikely(tp_vars->role != BATADV_TP_SENDER)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end); if (unlikely(!orig_node)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } primary_if = batadv_primary_if_get_selected(bat_priv); if (unlikely(!primary_if)) { err = BATADV_TP_REASON_DST_UNREACHABLE; - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); goto out; } @@ -862,7 +897,7 @@ static int batadv_tp_send(void *arg) queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work, msecs_to_jiffies(tp_vars->test_length)); - while (atomic_read(&tp_vars->sending) != 0) { + while (!batadv_tp_sender_stopped(tp_vars)) { if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) { batadv_tp_wait_available(tp_vars, payload_len); continue; @@ -885,8 +920,7 @@ static int batadv_tp_send(void *arg) "Meter: %s() cannot send packets (%d)\n", __func__, err); /* ensure nobody else tries to stop the thread now */ - if (atomic_dec_and_test(&tp_vars->sending)) - tp_vars->reason = err; + batadv_tp_sender_shutdown(tp_vars, err); break; } @@ -972,10 +1006,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, return; } - tp_vars = batadv_tp_list_find(bat_priv, dst); - if (tp_vars) { + if (batadv_tp_list_active(bat_priv, dst)) { spin_unlock_bh(&bat_priv->tp_list_lock); - batadv_tp_vars_put(tp_vars); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: test to or from the same node already ongoing, aborting\n"); batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING, @@ -1008,7 +1040,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, ether_addr_copy(tp_vars->other_end, dst); kref_init(&tp_vars->refcount); tp_vars->role = BATADV_TP_SENDER; - atomic_set(&tp_vars->sending, 1); + atomic_set(&tp_vars->send_result, 0); memcpy(tp_vars->session, session_id, sizeof(session_id)); tp_vars->icmp_uid = icmp_uid; @@ -1096,16 +1128,16 @@ void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, if (!orig_node) return; - tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig); + tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig, BATADV_TP_SENDER); if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Meter: trying to interrupt an already over connection\n"); - goto out; + goto out_put_orig_node; } batadv_tp_sender_shutdown(tp_vars, return_value); batadv_tp_vars_put(tp_vars); -out: +out_put_orig_node: batadv_orig_node_put(orig_node); } @@ -1156,6 +1188,9 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t) spin_unlock_bh(&tp_vars->unacked_lock); /* drop reference of timer */ + if (WARN_ON(atomic_xchg(&tp_vars->receiving, 0) != 1)) + return; + batadv_tp_vars_put(tp_vars); } @@ -1356,7 +1391,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, goto out_unlock; tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_RECEIVER); if (tp_vars) goto out_unlock; @@ -1374,6 +1409,7 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv, ether_addr_copy(tp_vars->other_end, icmp->orig); tp_vars->role = BATADV_TP_RECEIVER; + atomic_set(&tp_vars->receiving, 1); memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session)); tp_vars->last_recv = BATADV_TP_FIRST_SEQ; tp_vars->bat_priv = bat_priv; @@ -1426,7 +1462,7 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, } } else { tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, - icmp->session); + icmp->session, BATADV_TP_RECEIVER); if (!tp_vars) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, "Unexpected packet from %pM!\n", @@ -1435,13 +1471,6 @@ static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, } } - if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) { - batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: dropping packet: not expected (role=%u)\n", - tp_vars->role); - goto out; - } - tp_vars->last_recv_time = jiffies; /* if the packet is a duplicate, it may be the case that an ACK has been @@ -1546,8 +1575,12 @@ void batadv_tp_stop_all(struct batadv_priv *bat_priv) break; case BATADV_TP_RECEIVER: batadv_tp_list_detach(tp_var); - if (timer_shutdown_sync(&tp_var->timer)) - batadv_tp_vars_put(tp_var); + timer_shutdown_sync(&tp_var->timer); + + if (atomic_xchg(&tp_var->receiving, 0) != 1) + break; + + batadv_tp_vars_put(tp_var); break; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 05cddcf994f6..9f6e67771ffa 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -797,24 +797,33 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, s32 *tt_len) { u16 num_vlan = 0; - u16 num_entries = 0; u16 tvlv_len = 0; unsigned int change_offset; struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_orig_node_vlan *vlan; + u16 total_entries = 0; u8 *tt_change_ptr; + int vlan_entries; + u16 sum_entries; spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + + if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) { + *tt_len = 0; + goto out; + } + + total_entries = sum_entries; num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); } change_offset = struct_size(*tt_data, vlan_data, num_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); if (change_offset > U16_MAX || *tt_len > U16_MAX - change_offset) { *tt_len = 0; @@ -835,14 +844,26 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; + num_vlan = 0; hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); tt_vlan->reserved = 0; tt_vlan++; + num_vlan++; } + /* recalculate in case number of VLANs reduced */ + change_offset = struct_size(*tt_data, vlan_data, num_vlan); + tvlv_len = *tt_len + change_offset; + + (*tt_data)->num_vlan = htons(num_vlan); + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; @@ -877,21 +898,25 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_meshif_vlan *vlan; + size_t change_offset; u16 num_vlan = 0; - u16 vlan_entries = 0; u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; - int change_offset; + int vlan_entries; + u16 sum_entries; spin_lock_bh(&bat_priv->meshif_vlan_list_lock); hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); - if (vlan_entries < 1) - continue; + if (check_add_overflow(vlan_entries, total_entries, &sum_entries)) { + tvlv_len = 0; + goto out; + } + + total_entries = sum_entries; num_vlan++; - total_entries += vlan_entries; } change_offset = struct_size(*tt_data, vlan_data, num_vlan); @@ -900,8 +925,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, if (*tt_len < 0) *tt_len = batadv_tt_len(total_entries); - tvlv_len = *tt_len; - tvlv_len += change_offset; + if (check_add_overflow(*tt_len, change_offset, &tvlv_len)) { + tvlv_len = 0; + goto out; + } *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); if (!*tt_data) { @@ -914,6 +941,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (*tt_data)->vlan_data; + num_vlan = 0; hlist_for_each_entry(vlan, &bat_priv->meshif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) @@ -924,8 +952,15 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan->reserved = 0; tt_vlan++; + num_vlan++; } + /* recalculate in case number of VLANs reduced */ + change_offset = struct_size(*tt_data, vlan_data, num_vlan); + tvlv_len = *tt_len + change_offset; + + (*tt_data)->num_vlan = htons(num_vlan); + tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 8129a3f9c44d..cc6ac580c620 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -8,10 +8,12 @@ #include <linux/byteorder/generic.h> #include <linux/container_of.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/kref.h> +#include <linux/limits.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -159,10 +161,10 @@ batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) * * Return: size of all currently registered tvlv containers in bytes. */ -static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +static size_t batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) { struct batadv_tvlv_container *tvlv; - u16 tvlv_len = 0; + size_t tvlv_len = 0; lockdep_assert_held(&bat_priv->tvlv.container_list_lock); @@ -306,26 +308,35 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, * The ogm packet might be enlarged or shrunk depending on the current size * and the size of the to-be-appended tvlv containers. * - * Return: size of all appended tvlv containers in bytes. + * Return: size of all appended tvlv containers in bytes (max U16_MAX), negative + * if operation failed */ -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, +int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, int *packet_buff_len, int packet_min_len) { struct batadv_tvlv_container *tvlv; struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_len; + size_t tvlv_value_len; void *tvlv_value; + int tvlv_len_ret; bool ret; spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); + if (tvlv_value_len > U16_MAX) { + tvlv_len_ret = -E2BIG; + goto end; + } ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, packet_min_len, tvlv_value_len); - - if (!ret) + if (!ret) { + tvlv_len_ret = -ENOMEM; goto end; + } + + tvlv_len_ret = tvlv_value_len; if (!tvlv_value_len) goto end; @@ -344,7 +355,8 @@ u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, end: spin_unlock_bh(&bat_priv->tvlv.container_list_lock); - return tvlv_value_len; + + return tvlv_len_ret; } /** diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index e5697230d991..f96f6b3f44a0 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -16,7 +16,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, u8 type, u8 version, void *tvlv_value, u16 tvlv_value_len); -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, +int batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, unsigned char **packet_buff, int *packet_buff_len, int packet_min_len); void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index daa06f421154..a01ee46d97f3 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -83,6 +83,9 @@ struct batadv_hard_iface_bat_iv { /** @ogm_seqno: OGM sequence number - used to identify each OGM */ atomic_t ogm_seqno; + /** @reschedule_work: recover OGM schedule after schedule error */ + struct delayed_work reschedule_work; + /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct mutex ogm_buff_mutex; }; @@ -301,7 +304,7 @@ struct batadv_frag_table_entry { u16 seqno; /** @size: accumulated size of packets in list */ - u16 size; + size_t size; /** @total_size: expected size of the assembled packet */ u16 total_size; @@ -452,7 +455,7 @@ struct batadv_orig_node { * @tt_buff_len: length of the last tt changeset this node received * from the orig node */ - s16 tt_buff_len; + u16 tt_buff_len; /** @tt_buff_lock: lock that protects tt_buff and tt_buff_len */ spinlock_t tt_buff_lock; @@ -993,7 +996,7 @@ struct batadv_priv_tt { * @last_changeset_len: length of last tt changeset this host has * generated */ - s16 last_changeset_len; + u16 last_changeset_len; /** * @last_changeset_lock: lock protecting last_changeset & @@ -1024,6 +1027,12 @@ struct batadv_priv_bla { atomic_t num_requests; /** + * @num_requests_lock: locks update num_requests + + * batadv_backbone_gw::state + batadv_backbone_gw::wait_periods update + */ + spinlock_t num_requests_lock; + + /** * @claim_hash: hash table containing mesh nodes this host has claimed */ struct batadv_hashtable *claim_hash; @@ -1320,11 +1329,14 @@ struct batadv_tp_vars { /** @role: receiver/sender modi */ enum batadv_tp_meter_role role; - /** @sending: sending binary semaphore: 1 if sending, 0 is not */ - atomic_t sending; + /** + * @send_result: 0 when sending is ongoing and otherwise + * enum batadv_tp_meter_reason + */ + atomic_t send_result; - /** @reason: reason for a stopped session */ - enum batadv_tp_meter_reason reason; + /** @receiving: receiving binary semaphore: 1 if receiving, 0 is not */ + atomic_t receiving; /** @finish_work: work item for the finishing procedure */ struct delayed_work finish_work; @@ -1666,6 +1678,27 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_BLA +enum batadv_bla_backbone_gw_state { + /** + * @BATADV_BLA_BACKBONE_GW_STOPPED: backbone gw is being removed + * and it must not longer work on requests + */ + BATADV_BLA_BACKBONE_GW_STOPPED, + + /** + * @BATADV_BLA_BACKBONE_GW_UNSYNCED: backbone was detected out + * of sync and a request was send. No traffic is forwarded until the + * situation is resolved + */ + BATADV_BLA_BACKBONE_GW_UNSYNCED, + + /** + * @BATADV_BLA_BACKBONE_GW_SYNCED: backbone is consider to be in + * sync. traffic can be forwarded + */ + BATADV_BLA_BACKBONE_GW_SYNCED, +}; + /** * struct batadv_bla_backbone_gw - batman-adv gateway bridged into the LAN */ @@ -1691,16 +1724,12 @@ struct batadv_bla_backbone_gw { /** * @wait_periods: grace time for bridge forward delays and bla group * forming at bootup phase - no bcast traffic is formwared until it has - * elapsed + * elapsed. Must only be access with num_requests_lock. */ - atomic_t wait_periods; + u8 wait_periods; - /** - * @request_sent: if this bool is set to true we are out of sync with - * this backbone gateway - no bcast traffic is formwared until the - * situation was resolved - */ - atomic_t request_sent; + /** @state: sync state. Must only be access with num_requests_lock. */ + enum batadv_bla_backbone_gw_state state; /** @crc: crc16 checksum over all claims */ u16 crc; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 2b94e2077203..70e35e198075 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,6 +154,7 @@ struct sock *bt_sock_alloc(struct net *net, struct socket *sock, sock_init_data(sock, sk); INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + spin_lock_init(&bt_sk(sk)->accept_q_lock); sock_reset_flag(sk, SOCK_ZAPPED); @@ -214,6 +215,7 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; struct pid *old_pid; + struct bt_sock *par = bt_sk(parent); BT_DBG("parent %p, sk %p", parent, sk); @@ -224,9 +226,13 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) else lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; + spin_lock_bh(&par->accept_q_lock); + list_add_tail(&bt_sk(sk)->accept_q, &par->accept_q); + sk_acceptq_added(parent); + spin_unlock_bh(&par->accept_q_lock); + /* Copy credentials from parent since for incoming connections the * socket is allocated by the kernel. */ @@ -244,8 +250,6 @@ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) bh_unlock_sock(sk); else release_sock(sk); - - sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); @@ -254,45 +258,72 @@ EXPORT_SYMBOL(bt_accept_enqueue); */ void bt_accept_unlink(struct sock *sk) { + struct sock *parent = bt_sk(sk)->parent; + BT_DBG("sk %p state %d", sk, sk->sk_state); + spin_lock_bh(&bt_sk(parent)->accept_q_lock); list_del_init(&bt_sk(sk)->accept_q); - sk_acceptq_removed(bt_sk(sk)->parent); + sk_acceptq_removed(parent); + spin_unlock_bh(&bt_sk(parent)->accept_q_lock); bt_sk(sk)->parent = NULL; sock_put(sk); } EXPORT_SYMBOL(bt_accept_unlink); +static struct sock *bt_accept_get(struct sock *parent, struct sock *sk) +{ + struct bt_sock *bt = bt_sk(parent); + struct sock *next = NULL; + + /* accept_q is modified from child teardown paths too, so take a + * temporary reference before dropping the queue lock. + */ + spin_lock_bh(&bt->accept_q_lock); + + if (sk) { + if (bt_sk(sk)->parent != parent) + goto out; + + if (!list_is_last(&bt_sk(sk)->accept_q, &bt->accept_q)) { + next = &list_next_entry(bt_sk(sk), accept_q)->sk; + sock_hold(next); + } + } else if (!list_empty(&bt->accept_q)) { + next = &list_first_entry(&bt->accept_q, + struct bt_sock, accept_q)->sk; + sock_hold(next); + } + +out: + spin_unlock_bh(&bt->accept_q_lock); + return next; +} + struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { - struct bt_sock *s, *n; - struct sock *sk; + struct sock *sk, *next; BT_DBG("parent %p", parent); restart: - list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)s; - + for (sk = bt_accept_get(parent, NULL); sk; sk = next) { /* Prevent early freeing of sk due to unlink and sock_kill */ - sock_hold(sk); lock_sock(sk); /* Check sk has not already been unlinked via * bt_accept_unlink() due to serialisation caused by sk locking */ - if (!bt_sk(sk)->parent) { + if (bt_sk(sk)->parent != parent) { BT_DBG("sk %p, already unlinked", sk); release_sock(sk); sock_put(sk); - /* Restart the loop as sk is no longer in the list - * and also avoid a potential infinite loop because - * list_for_each_entry_safe() is not thread safe. - */ goto restart; } + next = bt_accept_get(parent, sk); + /* sk is safely in the parent list so reduce reference count */ sock_put(sk); @@ -309,7 +340,19 @@ restart: if (newsock) sock_graft(sk, newsock); + /* Hand the caller a reference taken while sk is + * still locked. bt_accept_unlink() just dropped + * the accept-queue reference; without this hold a + * concurrent teardown (e.g. l2cap_conn_del() -> + * l2cap_sock_kill()) could free sk between + * release_sock() and the caller using it. Every + * caller drops this with sock_put() when done. + */ + sock_hold(sk); + release_sock(sk); + if (next) + sock_put(next); return sk; } @@ -518,18 +561,28 @@ EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline __poll_t bt_accept_poll(struct sock *parent) { - struct bt_sock *s, *n; + struct bt_sock *bt = bt_sk(parent); + struct bt_sock *s; struct sock *sk; + __poll_t mask = 0; + + spin_lock_bh(&bt->accept_q_lock); + list_for_each_entry(s, &bt->accept_q, accept_q) { + int state; - list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; - if (sk->sk_state == BT_CONNECTED || - (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && - sk->sk_state == BT_CONNECT2)) - return EPOLLIN | EPOLLRDNORM; + state = READ_ONCE(sk->sk_state); + + if (state == BT_CONNECTED || + (test_bit(BT_SK_DEFER_SETUP, &bt->flags) && + state == BT_CONNECT2)) { + mask = EPOLLIN | EPOLLRDNORM; + break; + } } + spin_unlock_bh(&bt->accept_q_lock); - return 0; + return mask; } __poll_t bt_sock_poll(struct file *file, struct socket *sock, diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index d44987d4515c..b3cef7a4db54 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -638,8 +638,8 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock) goto failed; } - up_write(&bnep_session_sem); strcpy(req->device, dev->name); + up_write(&bnep_session_sem); return 0; failed: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 919ec275dd23..426f465be355 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4438,6 +4438,9 @@ static int hci_le_set_event_mask_sync(struct hci_dev *hdev) events[4] |= 0x02; /* LE BIG Info Advertising Report */ } + if (ll_ext_feature_capable(hdev)) + events[5] |= BIT(2); + if (le_cs_capable(hdev)) { /* Channel Sounding events */ events[5] |= 0x08; /* LE CS Read Remote Supported Cap Complete event */ @@ -7413,9 +7416,6 @@ static int hci_le_read_all_remote_features_sync(struct hci_dev *hdev, sizeof(cp), &cp, HCI_EVT_LE_ALL_REMOTE_FEATURES_COMPLETE, HCI_CMD_TIMEOUT, NULL); - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_ALL_REMOTE_FEATURES, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); } static int hci_le_read_remote_features_sync(struct hci_dev *hdev, void *data) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index be145e2736b7..c72830744d56 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -759,6 +759,8 @@ static void iso_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { iso_sock_close(sk); iso_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } /* If listening socket has a hcon, properly disconnect it */ @@ -1364,8 +1366,13 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, } ch = bt_accept_dequeue(sk, newsock); - if (ch) + if (ch) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps ch alive from here. + */ + sock_put(ch); break; + } if (!timeo) { err = -EAGAIN; @@ -2587,6 +2594,11 @@ int iso_recv(struct hci_dev *hdev, u16 handle, struct sk_buff *skb, u16 flags) break; case ISO_END: + if (!conn->rx_len) { + BT_ERR("Unexpected end frame (len %d)", skb->len); + goto drop; + } + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), skb->len); conn->rx_len -= skb->len; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0d8053a3fc0a..99297d8f2c1f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7275,7 +7275,7 @@ static void l2cap_ecred_reconfigure(struct l2cap_chan *chan) chan->ident = l2cap_get_ident(conn); l2cap_send_cmd(conn, chan->ident, L2CAP_ECRED_RECONF_REQ, - sizeof(pdu), &pdu); + struct_size(pdu, scid, 1), pdu); } int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index cf590a67d364..b34e7da8d906 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -349,8 +349,13 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, } nsk = bt_accept_dequeue(sk, newsock); - if (nsk) + if (nsk) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps nsk alive from here. + */ + sock_put(nsk); break; + } if (!timeo) { err = -EAGAIN; @@ -1475,22 +1480,54 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) BT_DBG("parent %p state %s", parent, state_to_string(parent->sk_state)); - /* Close not yet accepted channels */ + /* Close not yet accepted channels. + * + * bt_accept_dequeue() now returns sk with an extra reference held + * (taken while sk was still locked) so a concurrent l2cap_conn_del() + * -> l2cap_sock_kill() cannot free sk under us. + * + * cleanup_listen() runs under the parent sk lock, so unlike + * l2cap_sock_shutdown() we must NOT take conn->lock here: that would + * establish sk_lock -> conn->lock and invert the established + * conn->lock -> chan->lock -> sk_lock order (lockdep deadlock). + * + * Instead, briefly take the child sk lock to fetch and pin its chan. + * l2cap_conn_del() reaches the chan free only via + * l2cap_chan_del() -> l2cap_sock_teardown_cb(), which itself takes + * the child sk lock; holding it across l2cap_chan_hold_unless_zero() + * therefore guarantees the chan cannot be freed while we read and + * pin it (hold_unless_zero() additionally skips a chan already past + * its last reference). We then drop the sk lock before taking + * chan->lock, so sk and chan locks are never held together. + */ while ((sk = bt_accept_dequeue(parent, NULL))) { - struct l2cap_chan *chan = l2cap_pi(sk)->chan; + struct l2cap_chan *chan; + + lock_sock_nested(sk, L2CAP_NESTING_NORMAL); + chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan); + release_sock(sk); + if (!chan) { + /* l2cap_conn_del() already tearing this child down */ + sock_put(sk); + continue; + } BT_DBG("child chan %p state %s", chan, state_to_string(chan->state)); - l2cap_chan_hold(chan); l2cap_chan_lock(chan); - __clear_chan_timer(chan); l2cap_chan_close(chan, ECONNRESET); - l2cap_sock_kill(sk); - + /* l2cap_conn_del() may already have killed this socket + * (it sets SOCK_DEAD); skip the duplicate to avoid a + * double sock_put()/l2cap_chan_put(). + */ + if (!sock_flag(sk, SOCK_DEAD)) + l2cap_sock_kill(sk); l2cap_chan_unlock(chan); + l2cap_chan_put(chan); + sock_put(sk); } } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b05bb380e5f8..de5bd6b637b2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9110,9 +9110,15 @@ static int add_ext_adv_data(struct sock *sk, struct hci_dev *hdev, void *data, struct adv_info *adv_instance; int err = 0; struct mgmt_pending_cmd *cmd; + u16 expected_len; BT_DBG("%s", hdev->name); + expected_len = struct_size(cp, data, cp->adv_data_len + cp->scan_rsp_len); + if (expected_len != data_len) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); adv_instance = hci_find_adv_instance(hdev, cp->instance); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index be6639cd6f59..bd7d959c6e9e 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -180,6 +180,8 @@ static void rfcomm_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { rfcomm_sock_close(sk); rfcomm_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } parent->sk_state = BT_CLOSED; @@ -497,8 +499,13 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, } nsk = bt_accept_dequeue(sk, newsock); - if (nsk) + if (nsk) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps nsk alive from here. + */ + sock_put(nsk); break; + } if (!timeo) { err = -EAGAIN; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 18826d4b9c0b..770b9d6fad88 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -498,6 +498,8 @@ static void sco_sock_cleanup_listen(struct sock *parent) while ((sk = bt_accept_dequeue(parent, NULL))) { sco_sock_close(sk); sco_sock_kill(sk); + /* Drop the reference handed back by bt_accept_dequeue(). */ + sock_put(sk); } parent->sk_state = BT_CLOSED; @@ -759,8 +761,13 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, } ch = bt_accept_dequeue(sk, newsock); - if (ch) + if (ch) { + /* Drop the bridging ref from bt_accept_dequeue(); + * the grafted socket keeps ch alive from here. + */ + sock_put(ch); break; + } if (!timeo) { err = -EAGAIN; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 881d866d687a..2eef4f3345cd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4640,10 +4640,24 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, rcu_read_unlock(); } -static void br_multicast_del_grps(struct net_bridge *br) +static void br_multicast_enable_all_ports(struct net_bridge *br) { struct net_bridge_port *port; + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port_ctx(&port->multicast_ctx); +} + +static void br_multicast_disable_all_ports(struct net_bridge *br) +{ + struct net_bridge_port *port; + + if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + list_for_each_entry(port, &br->port_list, list) __br_multicast_disable_port_ctx(&port->multicast_ctx); } @@ -4651,7 +4665,6 @@ static void br_multicast_del_grps(struct net_bridge *br) int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - struct net_bridge_port *port; bool change_snoopers = false; int err = 0; @@ -4668,7 +4681,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { change_snoopers = true; - br_multicast_del_grps(br); + br_multicast_disable_all_ports(br); goto unlock; } @@ -4676,8 +4689,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val, goto unlock; br_multicast_open(br); - list_for_each_entry(port, &br->port_list, list) - __br_multicast_enable_port_ctx(&port->multicast_ctx); + br_multicast_enable_all_ports(br); change_snoopers = true; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 741360219552..f05c79f215ea 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -112,24 +112,22 @@ static struct pernet_operations broute_net_ops = { static int __init ebtable_broute_init(void) { - int ret = ebt_register_template(&broute_table, broute_table_init); + int ret = register_pernet_subsys(&broute_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&broute_net_ops); - if (ret) { - ebt_unregister_template(&broute_table); - return ret; - } + ret = ebt_register_template(&broute_table, broute_table_init); + if (ret) + unregister_pernet_subsys(&broute_net_ops); - return 0; + return ret; } static void __exit ebtable_broute_fini(void) { - unregister_pernet_subsys(&broute_net_ops); ebt_unregister_template(&broute_table); + unregister_pernet_subsys(&broute_net_ops); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index dacd81b12e62..0fc03b07e62a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,24 +93,22 @@ static struct pernet_operations frame_filter_net_ops = { static int __init ebtable_filter_init(void) { - int ret = ebt_register_template(&frame_filter, frame_filter_table_init); + int ret = register_pernet_subsys(&frame_filter_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_filter_net_ops); - if (ret) { - ebt_unregister_template(&frame_filter); - return ret; - } + ret = ebt_register_template(&frame_filter, frame_filter_table_init); + if (ret) + unregister_pernet_subsys(&frame_filter_net_ops); - return 0; + return ret; } static void __exit ebtable_filter_fini(void) { - unregister_pernet_subsys(&frame_filter_net_ops); ebt_unregister_template(&frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0f2a8c6118d4..8a10375d8909 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,24 +93,22 @@ static struct pernet_operations frame_nat_net_ops = { static int __init ebtable_nat_init(void) { - int ret = ebt_register_template(&frame_nat, frame_nat_table_init); + int ret = register_pernet_subsys(&frame_nat_net_ops); if (ret) return ret; - ret = register_pernet_subsys(&frame_nat_net_ops); - if (ret) { - ebt_unregister_template(&frame_nat); - return ret; - } + ret = ebt_register_template(&frame_nat, frame_nat_table_init); + if (ret) + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } static void __exit ebtable_nat_fini(void) { - unregister_pernet_subsys(&frame_nat_net_ops); ebt_unregister_template(&frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aea3e19875c6..b9f4daac09af 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -42,6 +42,7 @@ struct ebt_pernet { struct list_head tables; + struct list_head dead_tables; }; struct ebt_template { @@ -1162,11 +1163,6 @@ free_newinfo: static void __ebt_unregister_table(struct net *net, struct ebt_table *table) { - mutex_lock(&ebt_mutex); - list_del(&table->list); - mutex_unlock(&ebt_mutex); - audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, ebt_cleanup_entry, net, NULL); if (table->private->nentries) @@ -1267,13 +1263,15 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, for (i = 0; i < num_ops; i++) ops[i].priv = table; - list_add(&table->list, &ebt_net->tables); - mutex_unlock(&ebt_mutex); - table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); - if (ret) + if (ret) { + synchronize_rcu(); __ebt_unregister_table(net, table); + } else { + list_add(&table->list, &ebt_net->tables); + } + mutex_unlock(&ebt_mutex); audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, AUDIT_XT_OP_REGISTER, GFP_KERNEL); @@ -1339,7 +1337,7 @@ void ebt_unregister_template(const struct ebt_table *t) } EXPORT_SYMBOL(ebt_unregister_template); -static struct ebt_table *__ebt_find_table(struct net *net, const char *name) +void ebt_unregister_table_pre_exit(struct net *net, const char *name) { struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); struct ebt_table *t; @@ -1348,30 +1346,36 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name) list_for_each_entry(t, &ebt_net->tables, list) { if (strcmp(t->name, name) == 0) { + list_move(&t->list, &ebt_net->dead_tables); mutex_unlock(&ebt_mutex); - return t; + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; } } mutex_unlock(&ebt_mutex); - return NULL; -} - -void ebt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct ebt_table *table = __ebt_find_table(net, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } EXPORT_SYMBOL(ebt_unregister_table_pre_exit); void ebt_unregister_table(struct net *net, const char *name) { - struct ebt_table *table = __ebt_find_table(net, name); + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + struct ebt_table *t; - if (table) - __ebt_unregister_table(net, table); + mutex_lock(&ebt_mutex); + + list_for_each_entry(t, &ebt_net->dead_tables, list) { + if (strcmp(t->name, name) == 0) { + list_del(&t->list); + audit_log_nfcfg(t->name, AF_BRIDGE, t->private->nentries, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + __ebt_unregister_table(net, t); + mutex_unlock(&ebt_mutex); + return; + } + } + + mutex_unlock(&ebt_mutex); } /* userspace just supplied us with counters */ @@ -2556,11 +2560,21 @@ static int __net_init ebt_pernet_init(struct net *net) struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); INIT_LIST_HEAD(&ebt_net->tables); + INIT_LIST_HEAD(&ebt_net->dead_tables); return 0; } +static void __net_exit ebt_pernet_exit(struct net *net) +{ + struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id); + + WARN_ON_ONCE(!list_empty(&ebt_net->tables)); + WARN_ON_ONCE(!list_empty(&ebt_net->dead_tables)); +} + static struct pernet_operations ebt_net_ops = { .init = ebt_pernet_init, + .exit = ebt_pernet_exit, .id = &ebt_pernet_id, .size = sizeof(struct ebt_pernet), }; @@ -2569,19 +2583,20 @@ static int __init ebtables_init(void) { int ret; - ret = xt_register_target(&ebt_standard_target); + ret = register_pernet_subsys(&ebt_net_ops); if (ret < 0) return ret; - ret = nf_register_sockopt(&ebt_sockopts); + + ret = xt_register_target(&ebt_standard_target); if (ret < 0) { - xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } - ret = register_pernet_subsys(&ebt_net_ops); + ret = nf_register_sockopt(&ebt_sockopts); if (ret < 0) { - nf_unregister_sockopt(&ebt_sockopts); xt_unregister_target(&ebt_standard_target); + unregister_pernet_subsys(&ebt_net_ops); return ret; } diff --git a/net/core/dev.c b/net/core/dev.c index e4fcf09ba2be..fab5a0bebd92 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6855,9 +6855,9 @@ static void skb_defer_free_flush(void) #if defined(CONFIG_NET_RX_BUSY_POLL) -static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) +static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout) { - if (!skip_schedule) { + if (!timeout) { gro_normal_list(&napi->gro); __napi_schedule(napi); return; @@ -6867,6 +6867,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) gro_flush_normal(&napi->gro, HZ >= 1000); clear_bit(NAPI_STATE_SCHED, &napi->state); + hrtimer_start(&napi->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); } enum { @@ -6878,8 +6880,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; - bool skip_schedule = false; - unsigned long timeout; + unsigned long timeout = 0; int rc; /* Busy polling means there is a high chance device driver hard irq @@ -6899,10 +6900,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi); - timeout = napi_get_gro_flush_timeout(napi); - if (napi->defer_hard_irqs_count && timeout) { - hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - skip_schedule = true; + if (napi->defer_hard_irqs_count) { + /* A short enough gro flush timeout and long enough + * poll can result in timer firing too early. + * Timer will be armed later if necessary. + */ + timeout = napi_get_gro_flush_timeout(napi); } } @@ -6917,7 +6920,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, trace_napi_poll(napi, rc, budget); netpoll_poll_unlock(have_poll_lock); if (rc == budget) - __busy_poll_stop(napi, skip_schedule); + __busy_poll_stop(napi, timeout); bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } diff --git a/net/core/devmem.c b/net/core/devmem.c index 69d79aee07ef..c84eb683c025 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -241,6 +241,11 @@ net_devmem_bind_dmabuf(struct net_device *dev, } if (direction == DMA_TO_DEVICE) { + if (!IS_ALIGNED(dmabuf->size, PAGE_SIZE)) { + err = -EINVAL; + NL_SET_ERR_MSG(extack, "TX dma-buf size must be a multiple of PAGE_SIZE"); + goto err_unmap; + } binding->tx_vec = kvmalloc_objs(struct net_iov *, dmabuf->size / PAGE_SIZE); if (!binding->tx_vec) { @@ -267,6 +272,12 @@ net_devmem_bind_dmabuf(struct net_device *dev, size_t len = sg_dma_len(sg); struct net_iov *niov; + if (!IS_ALIGNED(len, PAGE_SIZE)) { + err = -EINVAL; + NL_SET_ERR_MSG(extack, "dma-buf SG length must be PAGE_SIZE aligned"); + goto err_free_chunks; + } + owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, dev_to_node(&dev->dev)); if (!owner) { diff --git a/net/core/gro.c b/net/core/gro.c index 9f8960789b2c..a84753983467 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -109,6 +109,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; + if (skb_zcopy(p) || skb_zcopy(skb)) + return -ETOOMANYREFS; + if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || NAPI_GRO_CB(skb)->flush)) return -E2BIG; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6187a83bd741..e1850caf1a71 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1268,12 +1268,19 @@ out: static void sk_psock_verdict_data_ready(struct sock *sk) { const struct proto_ops *ops = NULL; + struct sk_psock *psock; struct socket *sock; int copied; trace_sk_data_ready(sk); rcu_read_lock(); + psock = sk_psock(sk); + if (psock && tls_sw_has_ctx_rx(sk)) { + psock->saved_data_ready(sk); + rcu_read_unlock(); + return; + } sock = READ_ONCE(sk->sk_socket); if (likely(sock)) ops = READ_ONCE(sock->ops); @@ -1283,8 +1290,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk) copied = ops->read_skb(sk, sk_psock_verdict_recv); if (copied >= 0) { - struct sk_psock *psock; - rcu_read_lock(); psock = sk_psock(sk); if (psock) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index f0883357d12e..4691d6d0f2b7 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -91,7 +91,7 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, u32 mask; if (end <= start) - return true; + return false; if (start % 32) { mask = ethnl_upper_bits(start); @@ -104,11 +104,11 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, start_word++; } - if (!memchr_inv(map + start_word, '\0', - (end_word - start_word) * sizeof(u32))) + if (memchr_inv(map + start_word, '\0', + (end_word - start_word) * sizeof(u32))) return true; if (end % 32 == 0) - return true; + return false; return map[end_word] & ethnl_lower_bits(end); } diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index 68372bef4b2f..98392a3c34b5 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -76,6 +76,7 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, struct nlattr **tb = info->attrs; struct phy_device_node *pdn; struct phy_device *phydev; + int ret; /* RTNL is held by the caller */ phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER, @@ -88,8 +89,19 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, return -EOPNOTSUPP; rep_data->phyindex = phydev->phyindex; + rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL); - rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->name) + return -ENOMEM; + + if (phydev->drv) { + rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL); + if (!rep_data->drvname) { + ret = -ENOMEM; + goto err_free_name; + } + } + rep_data->upstream_type = pdn->upstream_type; if (pdn->upstream_type == PHY_UPSTREAM_PHY) { @@ -97,15 +109,33 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info, rep_data->upstream_index = upstream->phyindex; } - if (pdn->parent_sfp_bus) + if (pdn->parent_sfp_bus) { rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus), GFP_KERNEL); + if (!rep_data->upstream_sfp_name) { + ret = -ENOMEM; + goto err_free_drvname; + } + } - if (phydev->sfp_bus) + if (phydev->sfp_bus) { rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus), GFP_KERNEL); + if (!rep_data->downstream_sfp_name) { + ret = -ENOMEM; + goto err_free_upstream_sfp; + } + } return 0; + +err_free_upstream_sfp: + kfree(rep_data->upstream_sfp_name); +err_free_drvname: + kfree(rep_data->drvname); +err_free_name: + kfree(rep_data->name); + return ret; } static int phy_fill_reply(struct sk_buff *skb, diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index d41863593674..f268e469af4f 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -163,8 +163,8 @@ void hsr_del_nodes(struct list_head *node_db) struct hsr_node *tmp; list_for_each_entry_safe(node, tmp, node_db, mac_list) { - list_del(&node->mac_list); - hsr_free_node(node); + list_del_rcu(&node->mac_list); + call_rcu(&node->rcu_head, hsr_free_node_rcu); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index bc987a59a095..f1988fd50354 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1137,7 +1137,7 @@ no_ownership: } drop: - __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + __inet_csk_reqsk_queue_drop(oreq->rsk_listener, oreq, true); reqsk_put(oreq); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 97ead883e4a1..ad2259678c78 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1501,13 +1501,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len static void __arpt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; + void *loc_cpu_entry; struct arpt_entry *iter; - private = xt_unregister_table(table); - /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) @@ -1515,6 +1513,7 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int arpt_register_table(struct net *net, @@ -1522,13 +1521,11 @@ int arpt_register_table(struct net *net, const struct arpt_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1543,7 +1540,7 @@ int arpt_register_table(struct net *net, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct arpt_entry *iter; @@ -1553,46 +1550,12 @@ int arpt_register_table(struct net *net, return PTR_ERR(new_table); } - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - return ret; - -out_free: - __arpt_unregister_table(net, new_table); - return ret; -} - -void arpt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } -EXPORT_SYMBOL(arpt_unregister_table_pre_exit); void arpt_unregister_table(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_ARP, name); if (table) __arpt_unregister_table(net, table); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 78cd5ee24448..370b635e3523 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -43,7 +43,7 @@ static int arptable_filter_table_init(struct net *net) static void __net_exit arptable_filter_net_pre_exit(struct net *net) { - arpt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_ARP, "filter"); } static void __net_exit arptable_filter_net_exit(struct net *net) @@ -58,32 +58,33 @@ static struct pernet_operations arptable_filter_net_ops = { static int __init arptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - arptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table); - if (IS_ERR(arpfilter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(arpfilter_ops)) return PTR_ERR(arpfilter_ops); - } ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + arptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(arpfilter_ops); - return ret; + unregister_pernet_subsys(&arptable_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(arpfilter_ops); return ret; } static void __exit arptable_filter_fini(void) { - unregister_pernet_subsys(&arptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); kfree(arpfilter_ops); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 23c8deff8095..5cbdb0815857 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1704,12 +1704,10 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ipt_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ipt_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1718,19 +1716,18 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1745,7 +1742,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ipt_entry *iter; @@ -1755,51 +1752,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table, return PTR_ERR(new_table); } - /* No template? No need to do anything. This is used by 'nat' table, it registers - * with the nat core instead of the netfilter core. - */ - if (!template_ops) - return 0; - - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - return ret; - -out_free: - __ipt_unregister_table(net, new_table); - return ret; -} - -void ipt_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ipt_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV4, name); if (table) __ipt_unregister_table(net, table); @@ -1887,7 +1845,6 @@ static void __exit ip_tables_fini(void) } EXPORT_SYMBOL(ipt_register_table); -EXPORT_SYMBOL(ipt_unregister_table_pre_exit); EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 3ab908b74795..672d7da1071d 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -61,7 +61,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "filter"); } static void __net_exit iptable_filter_net_exit(struct net *net) @@ -77,32 +77,33 @@ static struct pernet_operations iptable_filter_net_ops = { static int __init iptable_filter_init(void) { - int ret = xt_register_template(&packet_filter, - iptable_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, + iptable_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&iptable_filter_net_ops); + goto err_free; } return 0; +err_free: + kfree(filter_ops); + return ret; } static void __exit iptable_filter_fini(void) { - unregister_pernet_subsys(&iptable_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 385d945d8ebe..13d25d9a4610 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -96,7 +96,7 @@ static int iptable_mangle_table_init(struct net *net) static void __net_exit iptable_mangle_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "mangle"); } static void __net_exit iptable_mangle_net_exit(struct net *net) @@ -111,32 +111,33 @@ static struct pernet_operations iptable_mangle_net_ops = { static int __init iptable_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - iptable_mangle_table_init); - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); - ret = PTR_ERR(mangle_ops); - return ret; - } + if (IS_ERR(mangle_ops)) + return PTR_ERR(mangle_ops); ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + iptable_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&iptable_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } static void __exit iptable_mangle_fini(void) { - unregister_pernet_subsys(&iptable_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 625a1ca13b1b..a0df72554025 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -119,8 +119,11 @@ static int iptable_nat_table_init(struct net *net) } ret = ipt_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); + synchronize_rcu(); ipt_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; @@ -129,6 +132,7 @@ static int iptable_nat_table_init(struct net *net) static void __net_exit iptable_nat_net_pre_exit(struct net *net) { ipt_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat"); } static void __net_exit iptable_nat_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 0e7f53964d0a..2745c22f4034 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ static int iptable_raw_table_init(struct net *net) static void __net_exit iptable_raw_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "raw"); } static void __net_exit iptable_raw_net_exit(struct net *net) @@ -77,32 +77,32 @@ static int __init iptable_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, - iptable_raw_table_init); - if (ret < 0) - return ret; - rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, + iptable_raw_table_init); if (ret < 0) { - xt_unregister_template(table); - kfree(rawtable_ops); - return ret; + unregister_pernet_subsys(&iptable_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } static void __exit iptable_raw_fini(void) { + xt_unregister_template(&packet_raw); unregister_pernet_subsys(&iptable_raw_net_ops); kfree(rawtable_ops); - xt_unregister_template(&packet_raw); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index d885443cb267..491894511c54 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -50,7 +50,7 @@ static int iptable_security_table_init(struct net *net) static void __net_exit iptable_security_net_pre_exit(struct net *net) { - ipt_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "security"); } static void __net_exit iptable_security_net_exit(struct net *net) @@ -65,33 +65,34 @@ static struct pernet_operations iptable_security_net_ops = { static int __init iptable_security_init(void) { - int ret = xt_register_template(&security_table, - iptable_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + iptable_security_table_init); if (ret < 0) { - xt_unregister_template(&security_table); - kfree(sectbl_ops); - return ret; + unregister_pernet_subsys(&iptable_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } static void __exit iptable_security_fini(void) { + xt_unregister_template(&security_table); unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); - xt_unregister_template(&security_table); } module_init(iptable_security_init); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e20c41206e29..3ea759b66600 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -390,7 +390,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, * in, reject the frame as invalid */ err = -EINVAL; - if (iphlen > length) + if (iphlen > length || iphlen < sizeof(*iph)) goto error_free; if (iphlen >= sizeof(*iph)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cee51749df16..f27f50111172 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -299,9 +299,6 @@ enum { DEFINE_PER_CPU(unsigned int, tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); -DEFINE_PER_CPU(u32, tcp_tw_isn); -EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); - long sysctl_tcp_mem[3] __read_mostly; EXPORT_IPV6_MOD(sysctl_tcp_mem); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index a97cdf3e6af4..0a4b38b315fe 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -116,7 +116,8 @@ struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, { struct tcp_ao_key *key; - hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { + hlist_for_each_entry_rcu(key, &ao->head, node, + sk_fullsock(sk) && lockdep_sock_is_held(sk)) { if ((sndid >= 0 && key->sndid != sndid) || (rcvid >= 0 && key->rcvid != rcvid)) continue; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cb4bcc5a8578..a8b626ac1ade 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7648,6 +7648,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct sock *sk, struct sk_buff *skb) { struct tcp_fastopen_cookie foc = { .len = -1 }; + u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; struct tcp_options_received tmp_opt; const struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); @@ -7658,20 +7659,16 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct dst_entry *dst; struct flowi fl; u8 syncookies; - u32 isn; #ifdef CONFIG_TCP_AO const struct tcp_ao_hdr *aoh; #endif - isn = __this_cpu_read(tcp_tw_isn); - if (isn) { - /* TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - __this_cpu_write(tcp_tw_isn, 0); - } else { + /* If isn is non-zero, this SYN originally matched a TIME_WAIT socket. + * TW sockets are converted to open requests without limitations, + * we skip the queue limits and syncookie checks in the block below. + */ + if (!isn) { syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7b2463c2e25..0bda739f3d68 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2274,6 +2274,7 @@ lookup: } } + isn = 0; process: if (static_branch_unlikely(&ip4_min_ttl)) { /* min_ttl can be changed concurrently from do_ip_setsockopt() */ @@ -2302,6 +2303,7 @@ process: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); + TCP_SKB_CB(skb)->tcp_tw_isn = isn; skb->dev = NULL; @@ -2387,7 +2389,6 @@ do_time_wait: sk = sk2; tcp_v4_restore_cb(skb); refcounted = false; - __this_cpu_write(tcp_tw_isn, isn); goto process; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6b1654c1ad4a..9714d40c5b6e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -483,11 +483,11 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; struct sk_buff *segs, *seg; - __be16 newlen, msslen; struct udphdr *uh; unsigned int mss; bool copy_dtor; __sum16 check; + __be16 newlen; int ret = 0; mss = skb_shinfo(gso_skb)->gso_size; @@ -556,15 +556,6 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return segs; } - msslen = htons(sizeof(*uh) + mss); - - /* GSO partial and frag_list segmentation only requires splitting - * the frame into an MSS multiple and possibly a remainder, both - * cases return a GSO skb. So update the mss now. - */ - if (skb_is_gso(segs)) - mss *= skb_shinfo(segs)->gso_segs; - seg = segs; uh = udp_hdr(seg); @@ -587,7 +578,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (!seg->next) break; - uh->len = msslen; + uh->len = newlen; uh->check = check; if (seg->ip_summed == CHECKSUM_PARTIAL) @@ -600,9 +591,12 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, uh = udp_hdr(seg); } - /* last packet can be partial gso_size, account for that in checksum */ - newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) + - seg->data_len); + /* Unless skb fits perfectly as GSO_PARTIAL, the trailing + * segment may not be full MSS, account for that in the checksum + */ + if (!skb_is_gso(seg)) + newlen = htons(skb_tail_pointer(seg) - + skb_transport_header(seg) + seg->data_len); check = csum16_add(csum16_sub(uh->check, uh->len), newlen); uh->len = newlen; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 03cbce842c1a..cf90f933ca1a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -910,16 +910,27 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) { + enum skb_drop_reason drop_reason; struct ioam6_trace_hdr *trace; struct ioam6_namespace *ns; + struct inet6_dev *idev; struct ioam6_hdr *hdr; + drop_reason = SKB_DROP_REASON_IP_INHDR; + /* Bad alignment (must be 4n-aligned) */ if (optoff & 3) goto drop; + /* Does the device still have IPv6 configuration? */ + idev = __in6_dev_get(skb->dev); + if (!idev) { + drop_reason = SKB_DROP_REASON_IPV6DISABLED; + goto drop; + } + /* Ignore if IOAM is not enabled on ingress */ - if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) + if (!READ_ONCE(idev->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ @@ -955,9 +966,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) goto drop; - /* Trace pointer may have changed */ - trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) - + optoff + sizeof(*hdr)); + /* Trace and hdr pointers may have changed */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); ioam6_fill_trace_data(skb, ns, trace, true); @@ -972,7 +983,7 @@ ignore: return true; drop: - kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); + kfree_skb_reason(skb, drop_reason); return false; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d585ac3c1113..9d9c3763f2f5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1713,12 +1713,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) static void __ip6t_unregister_table(struct net *net, struct xt_table *table) { - struct xt_table_info *private; - void *loc_cpu_entry; + struct xt_table_info *private = table->private; struct module *table_owner = table->me; struct ip6t_entry *iter; - - private = xt_unregister_table(table); + void *loc_cpu_entry; /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; @@ -1727,19 +1725,18 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table) if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); + kfree(table); } int ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl, const struct nf_hook_ops *template_ops) { - struct nf_hook_ops *ops; - unsigned int num_ops; - int ret, i; - struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; - void *loc_cpu_entry; + struct xt_table_info *newinfo; struct xt_table *new_table; + void *loc_cpu_entry; + int ret; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -1754,7 +1751,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return ret; } - new_table = xt_register_table(net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ip6t_entry *iter; @@ -1764,48 +1761,12 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, return PTR_ERR(new_table); } - if (!template_ops) - return 0; - - num_ops = hweight32(table->valid_hooks); - if (num_ops == 0) { - ret = -EINVAL; - goto out_free; - } - - ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); - if (!ops) { - ret = -ENOMEM; - goto out_free; - } - - for (i = 0; i < num_ops; i++) - ops[i].priv = new_table; - - new_table->ops = ops; - - ret = nf_register_net_hooks(net, ops, num_ops); - if (ret != 0) - goto out_free; - return ret; - -out_free: - __ip6t_unregister_table(net, new_table); - return ret; -} - -void ip6t_unregister_table_pre_exit(struct net *net, const char *name) -{ - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); - - if (table) - nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ip6t_unregister_table_exit(struct net *net, const char *name) { - struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name); + struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV6, name); if (table) __ip6t_unregister_table(net, table); @@ -1894,7 +1855,6 @@ static void __exit ip6_tables_fini(void) } EXPORT_SYMBOL(ip6t_register_table); -EXPORT_SYMBOL(ip6t_unregister_table_pre_exit); EXPORT_SYMBOL(ip6t_unregister_table_exit); EXPORT_SYMBOL(ip6t_do_table); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index e7a3fb9355ee..450dd53846a2 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -168,6 +168,10 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par) pr_debug("unknown flags %X\n", optsinfo->invflags); return -EINVAL; } + if (optsinfo->optsnr > IP6T_OPTS_OPTSNR) { + pr_debug("too many supported opts specified\n"); + return -EINVAL; + } if (optsinfo->flags & IP6T_OPTS_NSTRICT) { pr_debug("Not strict - not implemented"); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index e8992693e14a..b074fc477676 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -60,7 +60,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "filter"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "filter"); } static void __net_exit ip6table_filter_net_exit(struct net *net) @@ -76,32 +76,32 @@ static struct pernet_operations ip6table_filter_net_ops = { static int __init ip6table_filter_init(void) { - int ret = xt_register_template(&packet_filter, - ip6table_filter_table_init); - - if (ret < 0) - return ret; + int ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); - if (IS_ERR(filter_ops)) { - xt_unregister_template(&packet_filter); + if (IS_ERR(filter_ops)) return PTR_ERR(filter_ops); - } ret = register_pernet_subsys(&ip6table_filter_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_filter, ip6table_filter_table_init); if (ret < 0) { - xt_unregister_template(&packet_filter); - kfree(filter_ops); - return ret; + unregister_pernet_subsys(&ip6table_filter_net_ops); + goto err_free; } + return 0; +err_free: + kfree(filter_ops); return ret; } static void __exit ip6table_filter_fini(void) { - unregister_pernet_subsys(&ip6table_filter_net_ops); xt_unregister_template(&packet_filter); + unregister_pernet_subsys(&ip6table_filter_net_ops); kfree(filter_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 8dd4cd0c47bd..e6ee036a9b2c 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -89,7 +89,7 @@ static int ip6table_mangle_table_init(struct net *net) static void __net_exit ip6table_mangle_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "mangle"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "mangle"); } static void __net_exit ip6table_mangle_net_exit(struct net *net) @@ -104,32 +104,33 @@ static struct pernet_operations ip6table_mangle_net_ops = { static int __init ip6table_mangle_init(void) { - int ret = xt_register_template(&packet_mangler, - ip6table_mangle_table_init); - - if (ret < 0) - return ret; + int ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) { - xt_unregister_template(&packet_mangler); + if (IS_ERR(mangle_ops)) return PTR_ERR(mangle_ops); - } ret = register_pernet_subsys(&ip6table_mangle_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&packet_mangler, + ip6table_mangle_table_init); if (ret < 0) { - xt_unregister_template(&packet_mangler); - kfree(mangle_ops); - return ret; + unregister_pernet_subsys(&ip6table_mangle_net_ops); + goto err_free; } + return 0; +err_free: + kfree(mangle_ops); return ret; } static void __exit ip6table_mangle_fini(void) { - unregister_pernet_subsys(&ip6table_mangle_net_ops); xt_unregister_template(&packet_mangler); + unregister_pernet_subsys(&ip6table_mangle_net_ops); kfree(mangle_ops); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 5be723232df8..c2394e2c94b5 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -121,8 +121,11 @@ static int ip6table_nat_table_init(struct net *net) } ret = ip6t_nat_register_lookups(net); - if (ret < 0) + if (ret < 0) { + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); + synchronize_rcu(); ip6t_unregister_table_exit(net, "nat"); + } kfree(repl); return ret; @@ -131,6 +134,7 @@ static int ip6table_nat_table_init(struct net *net) static void __net_exit ip6table_nat_net_pre_exit(struct net *net) { ip6t_nat_unregister_lookups(net); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat"); } static void __net_exit ip6table_nat_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index fc9f6754028f..3b161ee875bc 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -52,7 +52,7 @@ static int ip6table_raw_table_init(struct net *net) static void __net_exit ip6table_raw_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "raw"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "raw"); } static void __net_exit ip6table_raw_net_exit(struct net *net) @@ -75,31 +75,31 @@ static int __init ip6table_raw_init(void) pr_info("Enabling raw table before defrag\n"); } - ret = xt_register_template(table, ip6table_raw_table_init); - if (ret < 0) - return ret; - /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table); - if (IS_ERR(rawtable_ops)) { - xt_unregister_template(table); + if (IS_ERR(rawtable_ops)) return PTR_ERR(rawtable_ops); - } ret = register_pernet_subsys(&ip6table_raw_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(table, ip6table_raw_table_init); if (ret < 0) { - kfree(rawtable_ops); - xt_unregister_template(table); - return ret; + unregister_pernet_subsys(&ip6table_raw_net_ops); + goto err_free; } + return 0; +err_free: + kfree(rawtable_ops); return ret; } static void __exit ip6table_raw_fini(void) { - unregister_pernet_subsys(&ip6table_raw_net_ops); xt_unregister_template(&packet_raw); + unregister_pernet_subsys(&ip6table_raw_net_ops); kfree(rawtable_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 4df14a9bae78..4bd5d97b8ab6 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -49,7 +49,7 @@ static int ip6table_security_table_init(struct net *net) static void __net_exit ip6table_security_net_pre_exit(struct net *net) { - ip6t_unregister_table_pre_exit(net, "security"); + xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "security"); } static void __net_exit ip6table_security_net_exit(struct net *net) @@ -64,32 +64,33 @@ static struct pernet_operations ip6table_security_net_ops = { static int __init ip6table_security_init(void) { - int ret = xt_register_template(&security_table, - ip6table_security_table_init); - - if (ret < 0) - return ret; + int ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table); - if (IS_ERR(sectbl_ops)) { - xt_unregister_template(&security_table); + if (IS_ERR(sectbl_ops)) return PTR_ERR(sectbl_ops); - } ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + goto err_free; + + ret = xt_register_template(&security_table, + ip6table_security_table_init); if (ret < 0) { - kfree(sectbl_ops); - xt_unregister_template(&security_table); - return ret; + unregister_pernet_subsys(&ip6table_security_net_ops); + goto err_free; } + return 0; +err_free: + kfree(sectbl_ops); return ret; } static void __exit ip6table_security_fini(void) { - unregister_pernet_subsys(&ip6table_security_net_ops); xt_unregister_template(&security_table); + unregister_pernet_subsys(&ip6table_security_net_ops); kfree(sectbl_ops); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb09d5ccf599..a41ea2a866ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1818,6 +1818,7 @@ lookup: } } + isn = 0; process: if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ @@ -1846,6 +1847,7 @@ process: th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); + TCP_SKB_CB(skb)->tcp_tw_isn = isn; skb->dev = NULL; @@ -1933,7 +1935,6 @@ do_time_wait: sk = sk2; tcp_v6_restore_cb(skb); refcounted = false; - __this_cpu_write(tcp_tw_isn, isn); goto process; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 157fc23ce4e1..1455f67e01dd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1360,7 +1360,7 @@ static void l2tp_session_unhash(struct l2tp_session *session) spin_lock_bh(&pn->l2tp_session_idr_lock); /* Remove from the per-tunnel list */ - list_del_init(&session->list); + list_del_rcu(&session->list); /* Remove from per-net IDR */ if (tunnel->version == L2TP_HDR_VER_3) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 53bd98646e33..eba890366e9f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8070,6 +8070,7 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, "No active links for TID %d", tid); return -EINVAL; } + pos += map_size; } else { map = 0; } @@ -8088,7 +8089,6 @@ ieee80211_parse_neg_ttlm(struct ieee80211_sub_if_data *sdata, default: return -EINVAL; } - pos += map_size; } return 0; } @@ -11137,6 +11137,9 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, control = get_unaligned_le16(pos); link_id = control & IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID; + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) continue; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 2b3632c6008a..77894d997113 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -34,6 +34,22 @@ #include "led.h" #include "wep.h" +static const u8 empty_non_inheritance[] = { + WLAN_EID_EXTENSION, 1, WLAN_EID_EXT_NON_INHERITANCE, + /* + * cfg80211_is_element_inherited() hardcodes elements that + * cannot be inherited, so we just need an empty one to be + * calling it at all. + */ +}; + +struct ieee80211_elem_defrag { + const struct element *elem; + /* container start/len */ + const u8 *start; + size_t len; +}; + struct ieee80211_elems_parse { /* must be first for kfree to work */ struct ieee802_11_elems elems; @@ -41,11 +57,7 @@ struct ieee80211_elems_parse { /* The basic Multi-Link element in the original elements */ const struct element *ml_basic_elem; - /* The reconfiguration Multi-Link element in the original elements */ - const struct element *ml_reconf_elem; - - /* The EPCS Multi-Link element in the original elements */ - const struct element *ml_epcs_elem; + struct ieee80211_elem_defrag ml_reconf, ml_epcs; bool multi_link_inner; bool skip_vendor; @@ -162,10 +174,14 @@ ieee80211_parse_extension_element(u32 *crc, } break; case IEEE80211_ML_CONTROL_TYPE_RECONF: - elems_parse->ml_reconf_elem = elem; + elems_parse->ml_reconf.elem = elem; + elems_parse->ml_reconf.start = params->start; + elems_parse->ml_reconf.len = params->len; break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: - elems_parse->ml_epcs_elem = elem; + elems_parse->ml_epcs.elem = elem; + elems_parse->ml_epcs.start = params->start; + elems_parse->ml_epcs.len = params->len; break; default: break; @@ -916,7 +932,7 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, { struct ieee802_11_elems *elems = &elems_parse->elems; struct ieee80211_mle_per_sta_profile *prof; - const struct element *tmp; + const struct element *tmp, *ret; ssize_t ml_len; const u8 *end; @@ -986,50 +1002,40 @@ ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse, sub->from_ap = params->from_ap; sub->link_id = -1; - return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub->start, sub->len); -} - -static void -ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse) -{ - struct ieee802_11_elems *elems = &elems_parse->elems; - ssize_t ml_len; + ret = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + sub->start, sub->len); + if (ret) + return ret; - ml_len = cfg80211_defragment_element(elems_parse->ml_reconf_elem, - elems->ie_start, - elems->total_len, - elems_parse->scratch_pos, - elems_parse->scratch + - elems_parse->scratch_len - - elems_parse->scratch_pos, - WLAN_EID_FRAGMENT); - if (ml_len < 0) - return; - elems->ml_reconf = (void *)elems_parse->scratch_pos; - elems->ml_reconf_len = ml_len; - elems_parse->scratch_pos += ml_len; + /* + * Since we know we want and found a profile, apply an empty + * non-inheritance if the profile didn't have one, so that any + * element that shouldn't be inherited by spec isn't. + */ + return (const void *)empty_non_inheritance; } -static void -ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse) +static const void * +ieee80211_mle_defrag(struct ieee80211_elems_parse *elems_parse, + struct ieee80211_elem_defrag *defrag, + size_t *out_len) { - struct ieee802_11_elems *elems = &elems_parse->elems; + const void *ret; ssize_t ml_len; - ml_len = cfg80211_defragment_element(elems_parse->ml_epcs_elem, - elems->ie_start, - elems->total_len, + ml_len = cfg80211_defragment_element(defrag->elem, + defrag->start, defrag->len, elems_parse->scratch_pos, elems_parse->scratch + elems_parse->scratch_len - elems_parse->scratch_pos, WLAN_EID_FRAGMENT); if (ml_len < 0) - return; - elems->ml_epcs = (void *)elems_parse->scratch_pos; - elems->ml_epcs_len = ml_len; + return NULL; + ret = elems_parse->scratch_pos; + *out_len = ml_len; elems_parse->scratch_pos += ml_len; + return ret; } struct ieee802_11_elems * @@ -1042,6 +1048,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) size_t scratch_len = 3 * params->len; bool multi_link_inner = false; + BUILD_BUG_ON(sizeof(empty_non_inheritance) != empty_non_inheritance[1] + 2); BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0); /* cannot parse for both a specific link and non-transmitted BSS */ @@ -1089,6 +1096,17 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, sub.start, nontx_len); + /* + * If it's a non-transmitted BSS, we shouldn't pick + * any elements in the outer parsing that shouldn't + * be inherited. If the profile has a non-inheritance + * element this automatically happens, but if not then + * provide an empty one so that the hard-coded elements + * in cfg80211_is_element_inherited() are ignored, but + * it must be called. + */ + if (params->bss->transmitted_bss && !non_inherit) + non_inherit = (const void *)empty_non_inheritance; } else { /* must always parse to get elems_parse->ml_basic_elem */ non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params, @@ -1109,9 +1127,12 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) _ieee802_11_parse_elems_full(&sub, elems_parse, NULL); } - ieee80211_mle_defrag_reconf(elems_parse); - - ieee80211_mle_defrag_epcs(elems_parse); + elems->ml_reconf = ieee80211_mle_defrag(elems_parse, + &elems_parse->ml_reconf, + &elems->ml_reconf_len); + elems->ml_epcs = ieee80211_mle_defrag(elems_parse, + &elems_parse->ml_epcs, + &elems->ml_epcs_len); if (elems->tim && !elems->parse_error) { const struct ieee80211_tim_ie *tim_ie = elems->tim; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7a8c964b0ae6..e9570257151b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4941,6 +4941,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u8 sa[ETH_ALEN]; } addrs __aligned(2); struct ieee80211_sta_rx_stats *stats; + u32 encoded_rate; /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write * to a common data structure; drivers can implement that per queue @@ -5048,11 +5049,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, /* push the addresses in front */ memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); + /* capture before mesh forward may memset or free skb->cb */ + encoded_rate = sta_stats_encode_rate(status); + res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); switch (res) { case RX_QUEUED: stats->last_rx = jiffies; - stats->last_rate = sta_stats_encode_rate(status); + stats->last_rate = encoded_rate; return true; case RX_CONTINUE: break; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3c152bf66cd5..3e770c7407e1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -364,7 +364,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_lock_bh(&msk->pm.lock); - if (!mptcp_pm_should_add_signal_addr(msk)) { + /* The cancel path (mptcp_pm_del_add_timer()) can race with this + * callback. Once cancel updates retrans_times to MAX, suppress further + * retransmissions here. If this callback acquires pm.lock first, one + * final transmit attempt is still possible. + */ + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX && + !mptcp_pm_should_add_signal_addr(msk)) { pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); @@ -414,8 +420,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, /* Note: entry might have been removed by another thread. * We hold rcu_read_lock() to ensure it is not freed under us. */ - if (stop_timer) - sk_stop_timer_sync(sk, &entry->add_timer); + if (stop_timer) { + if (check_id) + sk_stop_timer(sk, &entry->add_timer); + else + sk_stop_timer_sync(sk, &entry->add_timer); + } rcu_read_unlock(); return entry; @@ -882,6 +892,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, struct mptcp_addr_info *addr, bool *echo, bool *drop_other_suboptions) { + bool skip_add_addr = false; int ret = false; u8 add_addr; u8 family; @@ -903,24 +914,49 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, } *echo = mptcp_pm_should_add_signal_echo(msk); - port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); - - family = *echo ? msk->pm.remote.family : msk->pm.local.family; - if (remaining < mptcp_add_addr_len(family, *echo, port)) - goto out_unlock; - if (*echo) { *addr = msk->pm.remote; add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); + port = !!msk->pm.remote.port; + family = msk->pm.remote.family; } else { *addr = msk->pm.local; add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); + port = !!msk->pm.local.port; + family = msk->pm.local.family; } - WRITE_ONCE(msk->pm.addr_signal, add_addr); + + if (remaining < mptcp_add_addr_len(family, *echo, port)) { + struct net *net = sock_net((struct sock *)msk); + + if (!*drop_other_suboptions) + goto out_unlock; + + if (*echo) { + MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP); + } else { + skip_add_addr = true; + MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP); + } + goto drop_signal_mark; + } + ret = true; +drop_signal_mark: + WRITE_ONCE(msk->pm.addr_signal, add_addr); + out_unlock: spin_unlock_bh(&msk->pm.lock); + + /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR: + * clear the signal bit, cancel the matching retransmission timer, and + * let the PM state machine progress. + */ + if (skip_add_addr) { + mptcp_pm_del_add_timer(msk, addr, true); + mptcp_pm_subflow_established(msk); + } return ret; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8ef967aa80a0..479fd51609f2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -397,12 +397,26 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb) return false; } - /* old data, keep it simple and drop the whole pkt, sender - * will retransmit as needed, if needed. + /* Completely old data? */ + if (!after64(MPTCP_SKB_CB(skb)->end_seq, msk->ack_seq)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); + mptcp_drop(sk, skb); + return false; + } + + /* Partial packet: map_seq < ack_seq < end_seq. + * Skip the already-acked bytes and enqueue the new data. */ - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA); - mptcp_drop(sk, skb); - return false; + copy_len = MPTCP_SKB_CB(skb)->end_seq - msk->ack_seq; + MPTCP_SKB_CB(skb)->offset += msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; + MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq - + MPTCP_SKB_CB(skb)->map_seq; + msk->bytes_received += copy_len; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); + + skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); + return true; } static void mptcp_stop_rtx_timer(struct sock *sk) @@ -3456,6 +3470,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) /* for fallback's sake */ WRITE_ONCE(msk->ack_seq, 0); + atomic64_set(&msk->rcv_wnd_sent, 0); WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index a22ec1a6f6ec..e26ca2a370e3 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -150,7 +150,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++, i++) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_ipmark4_data_next(&h->next, &e); @@ -162,6 +162,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ret = 0; + + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e977b5a9c48d..41ca24a22a02 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -186,7 +186,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { @@ -203,6 +203,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], ret = 0; } + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 39a01934b153..b9ac2efaa15c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -182,7 +182,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { @@ -199,6 +199,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = 0; } + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5c6de605a9fb..2d6652d43199 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -274,7 +274,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], p = port; ip2 = ip2_from; } - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to;) { e.ip = htonl(ip); for (; p <= port_to; p++) { e.port = htons(p); @@ -298,6 +298,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = ip2_from; } p = port; + if (ip == ip_to) + break; + ip++; } return ret; } diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 4f39bf7c843f..75e53fde6b29 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -72,6 +72,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, exp->flags = NF_CT_EXPECT_PERMANENT; exp->class = NF_CT_EXPECT_CLASS_DEFAULT; rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, NULL); write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 27ce5fda8993..b5ee274be773 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1814,14 +1814,17 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl)); if (exp) { + struct nf_conntrack_helper *assign_helper; + /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ ct->master = exp->master; - if (exp->helper) { + assign_helper = rcu_dereference(exp->assign_helper); + if (assign_helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) - rcu_assign_pointer(help->helper, exp->helper); + rcu_assign_pointer(help->helper, assign_helper); } #ifdef CONFIG_NF_CONNTRACK_MARK diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 24d0576d84b7..8e943efbdf0a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -344,6 +344,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, helper = rcu_dereference(help->helper); rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, NULL); write_pnet(&exp->net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES exp->zone = ct->zone; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3f5c50455b71..b2fe6554b9cf 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245); + rcu_assign_pointer(exp->assign_helper, &nf_conntrack_helper_h245); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); nathook = rcu_dereference(nfct_h323_nat_hook); if (memcmp(&ct->tuplehash[dir].tuple.src.u3, @@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3 : NULL, &ct->tuplehash[!dir].tuple.dst.u3, IPPROTO_TCP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */ nathook = rcu_dereference(nfct_h323_nat_hook); @@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_UDP, NULL, &port); - rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect RAS "); @@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, &ct->tuplehash[!dir].tuple.src.u3, &addr, IPPROTO_TCP, NULL, &port); exp->flags = NF_CT_EXPECT_PERMANENT; - rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931); + rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931); if (nf_ct_expect_related(exp, 0) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a715304a53d8..b594cd244fe1 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -400,6 +400,11 @@ static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) this = rcu_dereference_protected(exp->helper, lockdep_is_held(&nf_conntrack_expect_lock)); + if (this == me) + return true; + + this = rcu_dereference_protected(exp->assign_helper, + lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a20cd82446c5..e2f149aabbe8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2636,6 +2636,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct, + const struct nf_conntrack_helper *assign_helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask); @@ -2862,6 +2863,7 @@ static int ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report) { + struct nf_conntrack_helper *assign_helper = NULL; struct nlattr *cda[CTA_EXPECT_MAX+1]; struct nf_conntrack_tuple tuple, mask; struct nf_conntrack_expect *exp; @@ -2877,8 +2879,18 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, if (err < 0) return err; + if (cda[CTA_EXPECT_HELP_NAME]) { + const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]); + + assign_helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + tuple.dst.protonum); + if (!assign_helper) + return -EOPNOTSUPP; + } + exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct, - &tuple, &mask); + assign_helper, &tuple, &mask); if (IS_ERR(exp)) return PTR_ERR(exp); @@ -3517,6 +3529,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, static struct nf_conntrack_expect * ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, + const struct nf_conntrack_helper *assign_helper, struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *mask) { @@ -3570,6 +3583,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct, exp->zone = ct->zone; #endif rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, assign_helper); exp->tuple = *tuple; exp->mask.src.u3 = mask->src.u3; exp->mask.src.u.all = mask->src.u.all; @@ -3625,7 +3639,7 @@ ctnetlink_create_expect(struct net *net, ct = nf_ct_tuplehash_to_ctrack(h); rcu_read_lock(); - exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask); + exp = ctnetlink_alloc_expect(cda, ct, NULL, &tuple, &mask); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto err_rcu; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 81534213f00f..fd4326ee1aca 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1384,7 +1384,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; - rcu_assign_pointer(exp->helper, helper); + rcu_assign_pointer(exp->assign_helper, helper); exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; hooks = rcu_dereference(nf_nat_sip_hooks); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 7f12e56e6e52..dd416c8532c5 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -60,6 +60,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ + dev_put(entry->skb_dev); dev_put(state->in); dev_put(state->out); if (state->sk) @@ -101,6 +102,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) return false; + dev_hold(entry->skb_dev); dev_hold(state->in); dev_hold(state->out); @@ -201,11 +203,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, + .skb_dev = skb->dev, .state = *state, .hook_index = index, .size = sizeof(*entry) + route_key_size, }; - __nf_queue_entry_init_physdevs(entry); if (!nf_queue_entry_get_refs(entry)) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8e02f84784da..0529a19ca9a8 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1198,6 +1198,8 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) if (physinif == ifindex || physoutif == ifindex) return 1; #endif + if (entry->skb_dev && entry->skb_dev->ifindex == ifindex) + return 1; if (entry->state.in) if (entry->state.in->ifindex == ifindex) return 1; diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index c4569d4b9228..ad08a43535b5 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -163,7 +163,6 @@ static int nft_inner_parse_l2l3(const struct nft_inner *priv, return -1; if (fragoff == 0) { - thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; @@ -247,8 +246,8 @@ static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { - local_bh_enable(); local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); + local_bh_enable(); return false; } *tun_ctx = *this_cpu_tun_ctx; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b39017c80548..8050cc06a9a3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -55,6 +55,9 @@ static struct list_head xt_templates[NFPROTO_NUMPROTO]; struct xt_pernet { struct list_head tables[NFPROTO_NUMPROTO]; + + /* stash area used during netns exit */ + struct list_head dead_tables[NFPROTO_NUMPROTO]; }; struct compat_delta { @@ -1405,11 +1408,9 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); -struct xt_table_info * -xt_replace_table(struct xt_table *table, - unsigned int num_counters, - struct xt_table_info *newinfo, - int *error) +static struct xt_table_info * +do_replace_table(struct xt_table *table, unsigned int num_counters, + struct xt_table_info *newinfo, int *error) { struct xt_table_info *private; unsigned int cpu; @@ -1464,30 +1465,54 @@ xt_replace_table(struct xt_table *table, } } - audit_log_nfcfg(table->name, table->af, private->number, - !private->number ? AUDIT_XT_OP_REGISTER : - AUDIT_XT_OP_REPLACE, - GFP_KERNEL); + return private; +} + +struct xt_table_info * +xt_replace_table(struct xt_table *table, unsigned int num_counters, + struct xt_table_info *newinfo, + int *error) +{ + struct xt_table_info *private; + + private = do_replace_table(table, num_counters, newinfo, error); + if (private) + audit_log_nfcfg(table->name, table->af, private->number, + AUDIT_XT_OP_REPLACE, + GFP_KERNEL); + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); struct xt_table *xt_register_table(struct net *net, const struct xt_table *input_table, + const struct nf_hook_ops *template_ops, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t, *table = NULL; + struct nf_hook_ops *ops = NULL; struct xt_table_info *private; - struct xt_table *t, *table; - int ret; + unsigned int num_ops; + int ret = -EINVAL; + + num_ops = hweight32(input_table->valid_hooks); + if (num_ops == 0) + goto out; + + ret = -ENOMEM; + if (template_ops) { + ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); + if (!ops) + goto out; + } /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); - if (!table) { - ret = -ENOMEM; + if (!table) goto out; - } mutex_lock(&xt[table->af].mutex); /* Don't autoload: we'd eat our tail... */ @@ -1501,7 +1526,7 @@ struct xt_table *xt_register_table(struct net *net, /* Simplifies replace_table code. */ table->private = bootstrap; - if (!xt_replace_table(table, 0, newinfo, &ret)) + if (!do_replace_table(table, 0, newinfo, &ret)) goto unlock; private = table->private; @@ -1510,34 +1535,122 @@ struct xt_table *xt_register_table(struct net *net, /* save number of initial entries */ private->initial_entries = private->number; + if (ops) { + int i; + + for (i = 0; i < num_ops; i++) + ops[i].priv = table; + + ret = nf_register_net_hooks(net, ops, num_ops); + if (ret != 0) { + mutex_unlock(&xt[table->af].mutex); + /* nf_register_net_hooks() might have published a + * base chain before internal error unwind. + */ + synchronize_rcu(); + goto out; + } + + table->ops = ops; + } + + audit_log_nfcfg(table->name, table->af, private->number, + AUDIT_XT_OP_REGISTER, GFP_KERNEL); + list_add(&table->list, &xt_net->tables[table->af]); mutex_unlock(&xt[table->af].mutex); return table; unlock: mutex_unlock(&xt[table->af].mutex); - kfree(table); out: + kfree(table); + kfree(ops); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(xt_register_table); -void *xt_unregister_table(struct xt_table *table) +/** + * xt_unregister_table_pre_exit - pre-shutdown unregister of a table + * @net: network namespace + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Unregisters the specified netfilter table from the given network namespace + * and also unregisters the hooks from netfilter core: no new packets will be + * processed. + * + * This must be called prior to xt_unregister_table_exit() from the pernet + * .pre_exit callback. After this call, the table is no longer visible to + * the get/setsockopt path. In case of rmmod, module exit path must have + * called xt_unregister_template() prior to unregistering pernet ops to + * prevent re-instantiation of the table. + * + * See also: xt_unregister_table_exit() + */ +void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name) { - struct xt_table_info *private; + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *t; - mutex_lock(&xt[table->af].mutex); - private = table->private; - list_del(&table->list); - mutex_unlock(&xt[table->af].mutex); - audit_log_nfcfg(table->name, table->af, private->number, - AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); - kfree(table->ops); - kfree(table); + mutex_lock(&xt[af].mutex); + list_for_each_entry(t, &xt_net->tables[af], list) { + if (strcmp(t->name, name) == 0) { + list_move(&t->list, &xt_net->dead_tables[af]); + mutex_unlock(&xt[af].mutex); - return private; + if (t->ops) /* nat table registers with nat core, t->ops is NULL. */ + nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks)); + return; + } + } + mutex_unlock(&xt[af].mutex); +} +EXPORT_SYMBOL(xt_unregister_table_pre_exit); + +/** + * xt_unregister_table_exit - remove a table during namespace teardown + * @net: the network namespace from which to unregister the table + * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6) + * @name: name of the table to unregister + * + * Completes the unregister process for a table. This must be called from + * the pernet ops .exit callback. This is the second stage after + * xt_unregister_table_pre_exit(). + * + * pair with xt_unregister_table_pre_exit() during namespace shutdown. + * + * Return: the unregistered table or NULL if the table was never + * instantiated. The caller needs to kfree() the table after it + * has removed the family specific matches/targets. + */ +struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name) +{ + struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); + struct xt_table *table; + + mutex_lock(&xt[af].mutex); + list_for_each_entry(table, &xt_net->dead_tables[af], list) { + struct nf_hook_ops *ops = NULL; + + if (strcmp(table->name, name) != 0) + continue; + + list_del(&table->list); + + audit_log_nfcfg(table->name, table->af, table->private->number, + AUDIT_XT_OP_UNREGISTER, GFP_KERNEL); + swap(table->ops, ops); + mutex_unlock(&xt[af].mutex); + + kfree(ops); + return table; + } + mutex_unlock(&xt[af].mutex); + + return NULL; } -EXPORT_SYMBOL_GPL(xt_unregister_table); +EXPORT_SYMBOL_GPL(xt_unregister_table_exit); #endif #ifdef CONFIG_PROC_FS @@ -1984,8 +2097,10 @@ static int __net_init xt_net_init(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { INIT_LIST_HEAD(&xt_net->tables[i]); + INIT_LIST_HEAD(&xt_net->dead_tables[i]); + } return 0; } @@ -1994,8 +2109,10 @@ static void __net_exit xt_net_exit(struct net *net) struct xt_pernet *xt_net = net_generic(net, xt_pernet_id); int i; - for (i = 0; i < NFPROTO_NUMPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) { WARN_ON_ONCE(!list_empty(&xt_net->tables[i])); + WARN_ON_ONCE(!list_empty(&xt_net->dead_tables[i])); + } } static struct pernet_operations xt_net_ops = { diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 120e711ea78c..2c7bad24d7c9 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -671,8 +671,23 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) /* Look for an existing pipe handle */ sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); - if (sknode) - return sk_receive_skb(sknode, skb, 1); + if (sknode) { + int rc; + + /* pep_do_rcv() runs from two contexts: from softirq via + * phonet_rcv() -> __sk_receive_skb() with BH disabled, + * and from process context via + * release_sock() -> __release_sock(), which drops + * the listener slock with spin_unlock_bh() before draining + * the backlog. The child pipe slock is taken below via + * bh_lock_sock_nested(), which does not itself disable BH, so + * disable BH here to keep both acquire contexts consistent. + */ + local_bh_disable(); + rc = sk_receive_skb(sknode, skb, 1); + local_bh_enable(); + return rc; + } switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 27c2aa2dd023..783367eea798 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -213,8 +213,6 @@ struct rxrpc_skb_priv { struct { u16 offset; /* Offset of data */ u16 len; /* Length of data */ - u8 flags; -#define RXRPC_RX_VERIFIED 0x01 }; struct { rxrpc_seq_t first_ack; /* First packet in acks table */ @@ -774,6 +772,11 @@ struct rxrpc_call { struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ + void *rx_dec_buffer; /* Decryption buffer */ + unsigned short rx_dec_bsize; /* rx_dec_buffer size */ + unsigned short rx_dec_offset; /* Decrypted packet data offset */ + unsigned short rx_dec_len; /* Decrypted packet data len */ + rxrpc_seq_t rx_dec_seq; /* Packet in decryption buffer */ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ rxrpc_seq_t rx_consumed; /* Highest packet consumed */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2b19b252225e..fec59d9338b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -332,27 +332,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.securityIndex != 0 && - (skb_cloned(skb) || - skb_has_frag_list(skb) || - skb_has_shared_frag(skb))) { - /* Unshare the packet so that it can be - * modified by in-place decryption. - */ - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); - - if (nskb) { - rxrpc_new_skb(nskb, rxrpc_skb_new_unshared); - rxrpc_input_call_packet(call, nskb); - rxrpc_free_skb(nskb, rxrpc_skb_put_call_rx); - } else { - /* OOM - Drop the packet. */ - rxrpc_see_skb(skb, rxrpc_skb_see_unshare_nomem); - } - } else { - rxrpc_input_call_packet(call, skb); - } + rxrpc_input_call_packet(call, skb); rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); did_receive = true; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f035f486c139..fcb9d38bb521 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -152,6 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, spin_lock_init(&call->notify_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; + call->rx_pkt_offset = USHRT_MAX; call->tx_total_len = -1; call->tx_jumbo_max = 1; call->next_rx_timo = 20 * HZ; @@ -553,6 +554,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) rxrpc_purge_queue(&call->recvmsg_queue); rxrpc_purge_queue(&call->rx_queue); rxrpc_purge_queue(&call->rx_oos_queue); + kfree(call->rx_dec_buffer); } /* diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 0a260df45d25..7a26c6097d03 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -32,9 +32,6 @@ static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - sp->flags |= RXRPC_RX_VERIFIED; return 0; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index e1f7513a46db..c940600117a4 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -147,15 +147,52 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) } /* - * Decrypt and verify a DATA packet. + * Decrypt and verify a DATA packet. The content of the packet is pulled out + * into a flat buffer rather than decrypting in place in the skbuff. This also + * has the advantage of aligning the buffer correctly for the crypto routines. + * + * We keep track of the sequence number of the packet currently decrypted into + * the buffer in ->rx_dec_seq. If MSG_PEEK is used and steps onto a new + * packet, subsequent recvmsg() calls will have to go back and re-decrypt the + * current packet. */ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + int ret; - if (sp->flags & RXRPC_RX_VERIFIED) - return 0; - return call->security->verify_packet(call, skb); + if (sp->len > call->rx_dec_bsize) { + /* Make sure we can hold a 1412-byte jumbo subpacket and make + * sure that the buffer size is aligned to a crypto blocksize. + */ + size_t size = clamp(round_up(sp->len, 32), 2048, 65535); + void *buffer = krealloc(call->rx_dec_buffer, size, GFP_NOFS); + + if (!buffer) + return -ENOMEM; + call->rx_dec_buffer = buffer; + call->rx_dec_bsize = size; + } + + ret = -EFAULT; + if (skb_copy_bits(skb, sp->offset, call->rx_dec_buffer, sp->len) < 0) + goto err; + + call->rx_dec_offset = 0; + call->rx_dec_len = sp->len; + call->rx_dec_seq = sp->hdr.seq; + ret = call->security->verify_packet(call, skb); + if (ret < 0) + goto err; + return 0; + +err: + kfree(call->rx_dec_buffer); + call->rx_dec_buffer = NULL; + call->rx_dec_bsize = 0; + call->rx_dec_offset = 0; + call->rx_dec_len = 0; + return ret; } /* @@ -283,16 +320,21 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (msg) sock_recv_timestamp(msg, sock->sk, skb); - if (rx_pkt_offset == 0) { + if (call->rx_dec_seq != sp->hdr.seq || + !call->rx_dec_buffer) { ret2 = rxrpc_verify_data(call, skb); trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, - sp->offset, sp->len, ret2); + call->rx_dec_offset, + call->rx_dec_len, ret2); if (ret2 < 0) { ret = ret2; goto out; } - rx_pkt_offset = sp->offset; - rx_pkt_len = sp->len; + } + + if (rx_pkt_offset == USHRT_MAX) { + rx_pkt_offset = call->rx_dec_offset; + rx_pkt_len = call->rx_dec_len; } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); @@ -304,10 +346,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (copy > remain) copy = remain; if (copy > 0) { - ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, - copy); - if (ret2 < 0) { - ret = ret2; + ret2 = copy_to_iter(call->rx_dec_buffer + rx_pkt_offset, + copy, iter); + if (ret2 != copy) { + ret = -EFAULT; goto out; } @@ -328,7 +370,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, /* The whole packet has been transferred. */ if (sp->hdr.flags & RXRPC_LAST_PACKET) ret = 1; - rx_pkt_offset = 0; + rx_pkt_offset = USHRT_MAX; rx_pkt_len = 0; skb = skb_peek_next(skb, &call->recvmsg_queue); diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c index 0d5e654da918..f81703ee7ac3 100644 --- a/net/rxrpc/rxgk.c +++ b/net/rxrpc/rxgk.c @@ -473,15 +473,20 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxgk_header *hdr; struct krb5_buffer metadata; - unsigned int offset = sp->offset, len = sp->len; + unsigned int len = call->rx_dec_len; size_t data_offset = 0, data_len = len; + void *data = call->rx_dec_buffer, *p = data; u32 ac = 0; int ret = -ENOMEM; _enter(""); - crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, - &data_offset, &data_len); + if (crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE, + &data_offset, &data_len) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_1_short_header); + goto put_gk; + } hdr = kzalloc_obj(*hdr, GFP_NOFS); if (!hdr) @@ -496,16 +501,15 @@ static int rxgk_verify_packet_integrity(struct rxrpc_call *call, metadata.len = sizeof(*hdr); metadata.data = hdr; - ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata, - skb, &offset, &len, &ac); + ret = rxgk_verify_mic(gk->krb5, gk->rx_Kc, &metadata, &p, &len, &ac); kfree(hdr); if (ret < 0) { if (ret != -ENOMEM) rxrpc_abort_eproto(call, skb, ac, rxgk_abort_1_verify_mic_eproto); } else { - sp->offset = offset; - sp->len = len; + call->rx_dec_offset = p - data; + call->rx_dec_len = len; } put_gk: @@ -522,49 +526,53 @@ static int rxgk_verify_packet_encrypted(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxgk_header hdr; - unsigned int offset = sp->offset, len = sp->len; + struct rxgk_header *hdr; + unsigned int offset = 0, len = call->rx_dec_len; + void *data = call->rx_dec_buffer, *p = data; int ret; u32 ac = 0; _enter(""); - ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac); + if (crypto_krb5_check_data_len(gk->krb5, KRB5_ENCRYPT_MODE, + len, sizeof(*hdr)) < 0) { + ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, + rxgk_abort_2_short_header); + goto error; + } + + ret = rxgk_decrypt(gk->krb5, gk->rx_enc, &p, &len, &ac); if (ret < 0) { if (ret != -ENOMEM) rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto); goto error; } + offset = p - data; - if (len < sizeof(hdr)) { + if (len < sizeof(*hdr)) { ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, rxgk_abort_2_short_header); goto error; } /* Extract the header from the skb */ - ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr)); - if (ret < 0) { - ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT, - rxgk_abort_2_short_encdata); - goto error; - } - offset += sizeof(hdr); - len -= sizeof(hdr); - - if (ntohl(hdr.epoch) != call->conn->proto.epoch || - ntohl(hdr.cid) != call->cid || - ntohl(hdr.call_number) != call->call_id || - ntohl(hdr.seq) != sp->hdr.seq || - ntohl(hdr.sec_index) != call->security_ix || - ntohl(hdr.data_len) > len) { + hdr = data + offset; + offset += sizeof(*hdr); + len -= sizeof(*hdr); + + if (ntohl(hdr->epoch) != call->conn->proto.epoch || + ntohl(hdr->cid) != call->cid || + ntohl(hdr->call_number) != call->call_id || + ntohl(hdr->seq) != sp->hdr.seq || + ntohl(hdr->sec_index) != call->security_ix || + ntohl(hdr->data_len) > len) { ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON, rxgk_abort_2_short_data); goto error; } - sp->offset = offset; - sp->len = ntohl(hdr.data_len); + call->rx_dec_offset = offset; + call->rx_dec_len = ntohl(hdr->data_len); ret = 0; error: rxgk_put(gk); diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h index 1e257d7ab8ec..112b5366ce11 100644 --- a/net/rxrpc/rxgk_common.h +++ b/net/rxrpc/rxgk_common.h @@ -106,6 +106,49 @@ int rxgk_decrypt_skb(const struct krb5_enctype *krb5, } /* + * Apply decryption and checksumming functions a flat data buffer. The data + * point and length are updated to reflect the actual content of the encrypted + * region. + */ +static inline int rxgk_decrypt(const struct krb5_enctype *krb5, + struct crypto_aead *aead, + void **_data, unsigned int *_len, + int *_error_code) +{ + struct scatterlist sg[1]; + size_t offset = 0, len = *_len; + int ret; + + sg_init_one(sg, *_data, len); + + ret = crypto_krb5_decrypt(krb5, aead, sg, 1, &offset, &len); + switch (ret) { + case 0: + if (offset & 3) { + *_error_code = RXGK_INCONSISTENCY; + ret = -EPROTO; + break; + } + *_data += offset; + *_len = len; + break; + case -EBADMSG: /* Checksum mismatch. */ + case -EPROTO: + *_error_code = RXGK_SEALEDINCON; + break; + case -EMSGSIZE: + *_error_code = RXGK_PACKETSHORT; + break; + case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ + default: + *_error_code = RXGK_INCONSISTENCY; + break; + } + + return ret; +} + +/* * Check the MIC on a region of an skbuff. The offset and length are updated * to reflect the actual content of the secure region. */ @@ -148,3 +191,42 @@ int rxgk_verify_mic_skb(const struct krb5_enctype *krb5, return ret; } + +/* + * Check the MIC on a flat buffer. The data pointer and length are updated to + * reflect the actual content of the secure region. + */ +static inline +int rxgk_verify_mic(const struct krb5_enctype *krb5, + struct crypto_shash *shash, + const struct krb5_buffer *metadata, + void **_data, unsigned int *_len, + u32 *_error_code) +{ + struct scatterlist sg[1]; + size_t offset = 0, len = *_len; + int ret; + + sg_init_one(sg, *_data, len); + + ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, 1, &offset, &len); + switch (ret) { + case 0: + *_data += offset; + *_len = len; + break; + case -EBADMSG: /* Checksum mismatch */ + case -EPROTO: + *_error_code = RXGK_SEALEDINCON; + break; + case -EMSGSIZE: + *_error_code = RXGK_PACKETSHORT; + break; + case -ENOPKG: /* Would prefer RXGK_BADETYPE, but not available for YFS. */ + default: + *_error_code = RXGK_INCONSISTENCY; + break; + } + + return ret; +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index cba7935977f0..075936337836 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -430,27 +430,25 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_seq_t seq, struct skcipher_request *req) { - struct rxkad_level1_hdr sechdr; + struct rxkad_level1_hdr *sechdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; - struct scatterlist sg[16]; - u32 data_size, buf; + struct scatterlist sg[1]; + void *data = call->rx_dec_buffer; + u32 len = sp->len, data_size, buf; u16 check; int ret; _enter(""); - if (sp->len < 8) + if (len < 8) return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_1_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - sg_init_table(sg, ARRAY_SIZE(sg)); - ret = skb_to_sgvec(skb, sg, sp->offset, 8); - if (unlikely(ret < 0)) - return ret; + sg_init_one(sg, data, len); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -464,13 +462,11 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return ret; /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) - return rxrpc_abort_eproto(call, skb, RXKADDATALEN, - rxkad_abort_1_short_encdata); - sp->offset += sizeof(sechdr); - sp->len -= sizeof(sechdr); + sechdr = data; + call->rx_dec_offset = sizeof(*sechdr); + len -= sizeof(*sechdr); - buf = ntohl(sechdr.data_size); + buf = ntohl(sechdr->data_size); data_size = buf & 0xffff; check = buf >> 16; @@ -479,10 +475,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, if (check != 0) return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_1_short_check); - if (data_size > sp->len) + if (data_size > len) return rxrpc_abort_eproto(call, skb, RXKADDATALEN, rxkad_abort_1_short_data); - sp->len = data_size; + call->rx_dec_len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; @@ -496,43 +492,28 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxkad_level2_hdr sechdr; + struct rxkad_level2_hdr *sechdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; - struct scatterlist _sg[4], *sg; - u32 data_size, buf; + struct scatterlist sg[1]; + void *data = call->rx_dec_buffer; + u32 len = sp->len, data_size, buf; u16 check; - int nsg, ret; + int ret; - _enter(",{%d}", sp->len); + _enter(",{%d}", len); - if (sp->len < 8) + if (len < 8) return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_2_short_header); /* Don't let the crypto algo see a misaligned length. */ - sp->len = round_down(sp->len, 8); + len = round_down(len, 8); - /* Decrypt the skbuff in-place. TODO: We really want to decrypt - * directly into the target buffer. + /* Decrypt in place in the call's decryption buffer. TODO: We really + * want to decrypt directly into the target buffer. */ - sg = _sg; - nsg = skb_shinfo(skb)->nr_frags + 1; - if (nsg <= 4) { - nsg = 4; - } else { - sg = kmalloc_objs(*sg, nsg, GFP_NOIO); - if (!sg) - return -ENOMEM; - } - - sg_init_table(sg, nsg); - ret = skb_to_sgvec(skb, sg, sp->offset, sp->len); - if (unlikely(ret < 0)) { - if (sg != _sg) - kfree(sg); - return ret; - } + sg_init_one(sg, data, len); /* decrypt from the session key */ token = call->conn->key->payload.data[0]; @@ -540,11 +521,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); - if (sg != _sg) - kfree(sg); if (ret < 0) { if (ret == -ENOMEM) return ret; @@ -553,13 +532,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) - return rxrpc_abort_eproto(call, skb, RXKADDATALEN, - rxkad_abort_2_short_len); - sp->offset += sizeof(sechdr); - sp->len -= sizeof(sechdr); + sechdr = data; + call->rx_dec_offset = sizeof(*sechdr); + len -= sizeof(*sechdr); - buf = ntohl(sechdr.data_size); + buf = ntohl(sechdr->data_size); data_size = buf & 0xffff; check = buf >> 16; @@ -569,17 +546,18 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, rxkad_abort_2_short_check); - if (data_size > sp->len) + if (data_size > len) return rxrpc_abort_eproto(call, skb, RXKADDATALEN, rxkad_abort_2_short_data); - sp->len = data_size; + call->rx_dec_len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; } /* - * Verify the security on a received packet and the subpackets therein. + * Verify the security on a received (sub)packet. If the packet needs + * modifying (e.g. decrypting), it must be copied. */ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c index 94bc9c7382ea..526fbde2e37f 100644 --- a/net/shaper/shaper.c +++ b/net/shaper/shaper.c @@ -90,6 +90,12 @@ static int net_shaper_handle_size(void) nla_total_size(sizeof(u32))); } +static int net_shaper_group_reply_size(void) +{ + return nla_total_size(sizeof(u32)) + /* NET_SHAPER_A_IFINDEX */ + net_shaper_handle_size(); /* NET_SHAPER_A_HANDLE */ +} + static int net_shaper_fill_binding(struct sk_buff *msg, const struct net_shaper_binding *binding, u32 type) @@ -130,35 +136,58 @@ handle_nest_cancel: return -EMSGSIZE; } +static void net_shaper_copy(struct net_shaper *dst, + const struct net_shaper *src) +{ + WRITE_ONCE(dst->parent.scope, READ_ONCE(src->parent.scope)); + WRITE_ONCE(dst->parent.id, READ_ONCE(src->parent.id)); + WRITE_ONCE(dst->handle.scope, READ_ONCE(src->handle.scope)); + WRITE_ONCE(dst->handle.id, READ_ONCE(src->handle.id)); + + WRITE_ONCE(dst->metric, READ_ONCE(src->metric)); + WRITE_ONCE(dst->bw_min, READ_ONCE(src->bw_min)); + WRITE_ONCE(dst->bw_max, READ_ONCE(src->bw_max)); + WRITE_ONCE(dst->burst, READ_ONCE(src->burst)); + WRITE_ONCE(dst->priority, READ_ONCE(src->priority)); + WRITE_ONCE(dst->weight, READ_ONCE(src->weight)); + + /* private fields are only used on the write path under the lock */ + data_race(dst->leaves = src->leaves); +} + static int net_shaper_fill_one(struct sk_buff *msg, const struct net_shaper_binding *binding, const struct net_shaper *shaper, const struct genl_info *info) { + struct net_shaper cur; void *hdr; hdr = genlmsg_iput(msg, info); if (!hdr) return -EMSGSIZE; + /* Make a copy to avoid data races */ + net_shaper_copy(&cur, shaper); + if (net_shaper_fill_binding(msg, binding, NET_SHAPER_A_IFINDEX) || - net_shaper_fill_handle(msg, &shaper->parent, + net_shaper_fill_handle(msg, &cur.parent, NET_SHAPER_A_PARENT) || - net_shaper_fill_handle(msg, &shaper->handle, + net_shaper_fill_handle(msg, &cur.handle, NET_SHAPER_A_HANDLE) || - ((shaper->bw_min || shaper->bw_max || shaper->burst) && - nla_put_u32(msg, NET_SHAPER_A_METRIC, shaper->metric)) || - (shaper->bw_min && - nla_put_uint(msg, NET_SHAPER_A_BW_MIN, shaper->bw_min)) || - (shaper->bw_max && - nla_put_uint(msg, NET_SHAPER_A_BW_MAX, shaper->bw_max)) || - (shaper->burst && - nla_put_uint(msg, NET_SHAPER_A_BURST, shaper->burst)) || - (shaper->priority && - nla_put_u32(msg, NET_SHAPER_A_PRIORITY, shaper->priority)) || - (shaper->weight && - nla_put_u32(msg, NET_SHAPER_A_WEIGHT, shaper->weight))) + ((cur.bw_min || cur.bw_max || cur.burst) && + nla_put_u32(msg, NET_SHAPER_A_METRIC, cur.metric)) || + (cur.bw_min && + nla_put_uint(msg, NET_SHAPER_A_BW_MIN, cur.bw_min)) || + (cur.bw_max && + nla_put_uint(msg, NET_SHAPER_A_BW_MAX, cur.bw_max)) || + (cur.burst && + nla_put_uint(msg, NET_SHAPER_A_BURST, cur.burst)) || + (cur.priority && + nla_put_u32(msg, NET_SHAPER_A_PRIORITY, cur.priority)) || + (cur.weight && + nla_put_u32(msg, NET_SHAPER_A_WEIGHT, cur.weight))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -275,25 +304,24 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle, parent->id = 0; } -/* - * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as - * it's cleared by xa_store(). - */ -#define NET_SHAPER_NOT_VALID XA_MARK_1 - static struct net_shaper * net_shaper_lookup(struct net_shaper_binding *binding, const struct net_shaper_handle *handle) { u32 index = net_shaper_handle_to_index(handle); struct net_shaper_hierarchy *hierarchy; + struct net_shaper *cur; hierarchy = net_shaper_hierarchy_rcu(binding); - if (!hierarchy || xa_get_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID)) + if (!hierarchy) return NULL; - return xa_load(&hierarchy->shapers, index); + cur = xa_load(&hierarchy->shapers, index); + /* Check valid before reading fields */ + if (!cur || !smp_load_acquire(&cur->valid)) + return NULL; + + return cur; } /* Allocate on demand the per device shaper's hierarchy container. @@ -370,13 +398,10 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding, goto free_id; } - /* Mark 'tentative' shaper inside the hierarchy container. - * xa_set_mark is a no-op if the previous store fails. + /* Insert as 'tentative' (no VALID mark). The mark will be set by + * net_shaper_commit() once the driver-side configuration succeeds. */ - xa_lock(&hierarchy->shapers); - prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); - __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID); - xa_unlock(&hierarchy->shapers); + prev = xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL); if (xa_err(prev)) { NL_SET_ERR_MSG(extack, "Can't insert shaper into device store"); kfree_rcu(cur, rcu); @@ -410,12 +435,10 @@ static void net_shaper_commit(struct net_shaper_binding *binding, if (WARN_ON_ONCE(!cur)) continue; - /* Successful update: drop the tentative mark - * and update the hierarchy container. - */ - __xa_clear_mark(&hierarchy->shapers, index, - NET_SHAPER_NOT_VALID); - *cur = shapers[i]; + /* Successful update: update the hierarchy container... */ + net_shaper_copy(cur, &shapers[i]); + /* ... publish to lockless readers. */ + smp_store_release(&cur->valid, true); } xa_unlock(&hierarchy->shapers); } @@ -431,10 +454,11 @@ static void net_shaper_rollback(struct net_shaper_binding *binding) return; xa_lock(&hierarchy->shapers); - xa_for_each_marked(&hierarchy->shapers, index, cur, - NET_SHAPER_NOT_VALID) { + xa_for_each(&hierarchy->shapers, index, cur) { + if (cur->valid) + continue; __xa_erase(&hierarchy->shapers, index); - kfree(cur); + kfree_rcu(cur, rcu); } xa_unlock(&hierarchy->shapers); } @@ -465,10 +489,21 @@ static int net_shaper_parse_handle(const struct nlattr *attr, * shaper (any other value). */ id_attr = tb[NET_SHAPER_A_HANDLE_ID]; - if (id_attr) + if (id_attr) { id = nla_get_u32(id_attr); - else if (handle->scope == NET_SHAPER_SCOPE_NODE) + } else if (handle->scope == NET_SHAPER_SCOPE_NODE) { id = NET_SHAPER_ID_UNSPEC; + } else if (handle->scope == NET_SHAPER_SCOPE_QUEUE) { + NL_SET_ERR_ATTR_MISS(info->extack, attr, + NET_SHAPER_A_HANDLE_ID); + return -EINVAL; + } + + if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) { + NL_SET_ERR_MSG_ATTR(info->extack, id_attr, + "Netdev scope is a singleton, must use ID 0"); + return -EINVAL; + } handle->id = id; return 0; @@ -836,7 +871,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb, goto out_unlock; for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index, - U32_MAX, XA_PRESENT)); ctx->start_index++) { + U32_MAX, XA_PRESENT)); + ctx->start_index++) { + /* Check valid before reading fields */ + if (!smp_load_acquire(&shaper->valid)) + continue; + ret = net_shaper_fill_one(skb, binding, shaper, info); if (ret) break; @@ -932,6 +972,46 @@ static int net_shaper_handle_cmp(const struct net_shaper_handle *a, return memcmp(a, b, sizeof(*a)); } +static int net_shaper_parse_leaves(struct net_shaper_binding *binding, + struct genl_info *info, + const struct net_shaper *node, + struct net_shaper *leaves, + int leaves_count) +{ + struct nlattr *attr; + int i, j, ret, rem; + + i = 0; + nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (WARN_ON_ONCE(i >= leaves_count)) + return -EINVAL; + + ret = net_shaper_parse_leaf(binding, attr, info, + node, &leaves[i]); + if (ret) + return ret; + + /* Reject duplicates */ + for (j = 0; j < i; j++) { + if (net_shaper_handle_cmp(&leaves[i].handle, + &leaves[j].handle)) + continue; + + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "Duplicate leaf shaper %d:%d", + leaves[i].handle.scope, + leaves[i].handle.id); + return -EINVAL; + } + + i++; + } + + return 0; +} + static int net_shaper_parent_from_leaves(int leaves_count, const struct net_shaper *leaves, struct net_shaper *node, @@ -964,15 +1044,22 @@ static int __net_shaper_group(struct net_shaper_binding *binding, int i, ret; if (node->handle.scope == NET_SHAPER_SCOPE_NODE) { + struct net_shaper *cur = NULL; + new_node = node->handle.id == NET_SHAPER_ID_UNSPEC; - if (!new_node && !net_shaper_lookup(binding, &node->handle)) { - /* The related attribute is not available when - * reaching here from the delete() op. - */ - NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists", - node->handle.scope, node->handle.id); - return -ENOENT; + if (!new_node) { + cur = net_shaper_lookup(binding, &node->handle); + if (!cur) { + /* The related attribute is not available + * when reaching here from the delete() op. + */ + NL_SET_ERR_MSG_FMT(extack, + "Node shaper %d:%d does not exist", + node->handle.scope, + node->handle.id); + return -ENOENT; + } } /* When unspecified, the node parent scope is inherited from @@ -986,6 +1073,15 @@ static int __net_shaper_group(struct net_shaper_binding *binding, return ret; } + if (cur && net_shaper_handle_cmp(&cur->parent, + &node->parent)) { + NL_SET_ERR_MSG_FMT(extack, + "Cannot reparent node shaper %d:%d", + node->handle.scope, + node->handle.id); + return -EOPNOTSUPP; + } + } else { net_shaper_default_parent(&node->handle, &node->parent); } @@ -1162,7 +1258,7 @@ static int net_shaper_group_send_reply(struct net_shaper_binding *binding, free_msg: /* Should never happen as msg is pre-allocated with enough space. */ WARN_ONCE(true, "calculated message payload length (%d)", - net_shaper_handle_size()); + net_shaper_group_reply_size()); nlmsg_free(msg); return -EMSGSIZE; } @@ -1172,10 +1268,9 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) struct net_shaper **old_nodes, *leaves, node = {}; struct net_shaper_hierarchy *hierarchy; struct net_shaper_binding *binding; - int i, ret, rem, leaves_count; + int i, ret, leaves_count; int old_nodes_count = 0; struct sk_buff *msg; - struct nlattr *attr; if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES)) return -EINVAL; @@ -1203,26 +1298,19 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info) if (ret) goto free_leaves; - i = 0; - nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES, - genlmsg_data(info->genlhdr), - genlmsg_len(info->genlhdr), rem) { - if (WARN_ON_ONCE(i >= leaves_count)) - goto free_leaves; - - ret = net_shaper_parse_leaf(binding, attr, info, - &node, &leaves[i]); - if (ret) - goto free_leaves; - i++; - } + ret = net_shaper_parse_leaves(binding, info, &node, + leaves, leaves_count); + if (ret) + goto free_leaves; /* Prepare the msg reply in advance, to avoid device operation * rollback on allocation failure. */ - msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL); - if (!msg) + msg = genlmsg_new(net_shaper_group_reply_size(), GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; goto free_leaves; + } hierarchy = net_shaper_hierarchy_setup(binding); if (!hierarchy) { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1a565095376a..f744f7911217 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1400,7 +1400,8 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, int i; for (i = 0; i < ini->ism_offered_cnt + 1; i++) { - if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) { + if (ini->ism_dev[i] && + ini->ism_chid[i] == ntohs(aclc->d1.chid)) { ini->ism_selected = i; return 0; } diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index a9a6e3c1113a..53da84f57fd6 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event, __field(const void *, smc) __field(u64, net_cookie) __field(size_t, len) - __string(name, smc->conn.lnk->ibname) + __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "") ), TP_fast_assign( diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 23a31646d038..11a70c10770b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -789,23 +789,33 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_pl->sg.end; sk_msg_iter_var_prev(i); + /* msg_pl->sg.data is a ring; data[MAX+1] is reserved for the wrap + * link (frags won't use it). 'i' is now the last filled entry: + * + * i end start + * v v v [ rsv ] + * [ d ][ d ][ ][ ]...[ ][ d ][ d ][ d ][chain] + * ^ END v + * `-----------------------------------------' + * + * Note that SGL does not allow chain-after-chain, so for TLS 1.3, + * we must make sure we don't create the wrap entry and then chain + * link to content_type immediately at index 0. + */ + if (i < msg_pl->sg.start) + sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data), + msg_pl->sg.data); + rec->content_type = record_type; if (prot->version == TLS_1_3_VERSION) { /* Add content type to end of message. No padding added */ sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); sg_mark_end(&rec->sg_content_type); - sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, - &rec->sg_content_type); + sg_chain(msg_pl->sg.data, i + 2, &rec->sg_content_type); } else { sg_mark_end(sk_msg_elem(msg_pl, i)); } - if (msg_pl->sg.end < msg_pl->sg.start) { - sg_chain(&msg_pl->sg.data[msg_pl->sg.start], - MAX_SKB_FRAGS - msg_pl->sg.start + 1, - msg_pl->sg.data); - } - i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]); @@ -1356,9 +1366,14 @@ unlock: mutex_unlock(&tls_ctx->tx_lock); } +/* When has_copied is true the caller has already moved bytes to + * userspace. Report sk_err but leave it set so the next read + * surfaces it instead of a spurious EOF, otherwise sk_err is + * consumed via sock_error(). + */ static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, - bool released) + bool released, bool has_copied) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1376,8 +1391,11 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (!sk_psock_queue_empty(psock)) return 0; - if (sk->sk_err) + if (sk->sk_err) { + if (has_copied) + return -READ_ONCE(sk->sk_err); return sock_error(sk); + } if (ret < 0) return ret; @@ -1413,7 +1431,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, } if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) - return tls_rx_rec_wait(sk, psock, nonblock, false); + return tls_rx_rec_wait(sk, psock, nonblock, false, has_copied); return 1; } @@ -2101,7 +2119,7 @@ int tls_sw_recvmsg(struct sock *sk, int to_decrypt, chunk; err = tls_rx_rec_wait(sk, psock, flags & MSG_DONTWAIT, - released); + released, !!(decrypted + copied)); if (err <= 0) { if (psock) { chunk = sk_msg_recvmsg(sk, psock, msg, len, @@ -2288,7 +2306,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct tls_decrypt_arg darg; err = tls_rx_rec_wait(sk, NULL, flags & SPLICE_F_NONBLOCK, - true); + true, false); if (err <= 0) goto splice_read_end; @@ -2374,7 +2392,7 @@ int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, } else { struct tls_decrypt_arg darg; - err = tls_rx_rec_wait(sk, NULL, true, released); + err = tls_rx_rec_wait(sk, NULL, true, released, !!copied); if (err <= 0) goto read_sock_end; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 001f6602a665..c3d68bf26ce1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2707,8 +2707,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len, - bool freezable) + struct sk_buff *last, bool freezable) { unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE; struct sk_buff *tail; @@ -2721,7 +2720,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, tail = skb_peek_tail(&sk->sk_receive_queue); if (tail != last || - (tail && tail->len != last_len) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || @@ -2917,7 +2915,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, int flags = state->flags; bool check_creds = false; struct scm_cookie scm; - unsigned int last_len; struct unix_sock *u; int copied = 0; int err = 0; @@ -2963,7 +2960,6 @@ redo: goto unlock; } last = skb = skb_peek(&sk->sk_receive_queue); - last_len = last ? last->len : 0; again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) @@ -2997,8 +2993,7 @@ again: mutex_unlock(&u->iolock); - timeo = unix_stream_data_wait(sk, timeo, last, - last_len, freezable); + timeo = unix_stream_data_wait(sk, timeo, last, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -3015,7 +3010,6 @@ unlock: while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; - last_len = skb->len; skb = skb_peek_next(skb, &sk->sk_receive_queue); if (!skb) goto again; @@ -3090,7 +3084,6 @@ unlock: skip = 0; last = skb; - last_len = skb->len; unix_state_lock(sk); skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 0d0265f770ad..e8fb2e20db0f 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -72,34 +72,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, return true; } -static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, - struct sk_buff *skb, - struct msghdr *msg, - size_t pkt_len, - bool zerocopy) -{ - struct ubuf_info *uarg; - - if (msg->msg_ubuf) { - uarg = msg->msg_ubuf; - net_zcopy_get(uarg); - } else { - struct ubuf_info_msgzc *uarg_zc; - - uarg = msg_zerocopy_realloc(sk_vsock(vsk), - pkt_len, NULL, false); - if (!uarg) - return -1; - - uarg_zc = uarg_to_msgzc(uarg); - uarg_zc->zerocopy = zerocopy ? 1 : 0; - } - - skb_zcopy_init(skb, uarg); - - return 0; -} - static int virtio_transport_fill_skb(struct sk_buff *skb, struct virtio_vsock_pkt_info *info, size_t len, @@ -319,8 +291,10 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; + struct ubuf_info *uarg = NULL; u32 pkt_len = info->pkt_len; bool can_zcopy = false; + bool have_uref = false; u32 rest_len; int ret; @@ -362,6 +336,25 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (can_zcopy) max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, (MAX_SKB_FRAGS * PAGE_SIZE)); + + if (info->msg->msg_flags & MSG_ZEROCOPY && + info->op == VIRTIO_VSOCK_OP_RW) { + uarg = info->msg->msg_ubuf; + + if (!uarg) { + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + pkt_len, NULL, false); + if (!uarg) { + virtio_transport_put_credit(vvs, pkt_len); + return -ENOMEM; + } + + if (!can_zcopy) + uarg_to_msgzc(uarg)->zerocopy = 0; + + have_uref = true; + } + } } rest_len = pkt_len; @@ -380,27 +373,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } - /* We process buffer part by part, allocating skb on - * each iteration. If this is last skb for this buffer - * and MSG_ZEROCOPY mode is in use - we must allocate - * completion for the current syscall. - * - * Pass pkt_len because msg iter is already consumed - * by virtio_transport_fill_skb(), so iter->count - * can not be used for RLIMIT_MEMLOCK pinned-pages - * accounting done by msg_zerocopy_realloc(). - */ - if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && - skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { - if (virtio_transport_init_zcopy_skb(vsk, skb, - info->msg, - pkt_len, - can_zcopy)) { - kfree_skb(skb); - ret = -ENOMEM; - break; - } - } + skb_zcopy_set(skb, uarg, NULL); virtio_transport_inc_tx_pkt(vvs, skb); @@ -424,6 +397,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, virtio_transport_put_credit(vvs, rest_len); + /* msg_zerocopy_realloc() initializes the ubuf_info refcnt to 1. + * skb_zcopy_set() increases it for each skb, so we can drop that + * initial reference to keep it balanced. + */ + if (have_uref) { + if (rest_len == pkt_len) + /* No data sent, abort the notification. */ + net_zcopy_put_abort(uarg, true); + else + net_zcopy_put(uarg); + } + /* Return number of bytes, if any data has been sent. */ if (rest_len != pkt_len) ret = pkt_len - rest_len; @@ -1350,7 +1335,7 @@ destroy: return err; } -static void +static bool virtio_transport_recv_enqueue(struct vsock_sock *vsk, struct sk_buff *skb) { @@ -1365,10 +1350,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, spin_lock_bh(&vvs->rx_lock); can_enqueue = virtio_transport_inc_rx_pkt(vvs, len); - if (!can_enqueue) { - free_pkt = true; + if (!can_enqueue) goto out; - } if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count++; @@ -1408,6 +1391,8 @@ out: spin_unlock_bh(&vvs->rx_lock); if (free_pkt) kfree_skb(skb); + + return can_enqueue; } static int @@ -1420,7 +1405,17 @@ virtio_transport_recv_connected(struct sock *sk, switch (le16_to_cpu(hdr->op)) { case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_enqueue(vsk, skb); + if (!virtio_transport_recv_enqueue(vsk, skb)) { + /* There is no more space to queue the packet, so let's + * close the connection; otherwise, we'll lose data. + */ + (void)virtio_transport_reset(vsk, skb); + virtio_transport_do_close(vsk, true); + sk->sk_err = ENOBUFS; + sk_error_report(sk); + vsock_remove_sock(vsk); + break; + } vsock_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_REQUEST: diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 4296ca1183f1..d2579380f51e 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1164,7 +1164,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, /* Close and cleanup the connection. */ vmci_transport_send_reset(pending, pkt); skerr = EPROTO; - err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL; + err = -EINVAL; goto destroy; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 328af43ef832..358cbc9e43d8 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2462,6 +2462,9 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, memcpy(merged_ie + copied_len, next_sub->data, next_sub->datalen); copied_len += next_sub->datalen; + + mbssid_elem = next_mbssid; + sub_elem = next_sub; } return copied_len; |
