diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-11 10:17:49 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-11 10:17:49 -0700 |
| commit | 22e2036479cb77df6281ebbd376ae6c330774790 (patch) | |
| tree | d872b0d89bf987fe6abf5897d6034baea0b5608f | |
| parent | 79f2670da86722d075633d20fa57418994ee6940 (diff) | |
| parent | 7360b96099806396f4ce15233f6dddcb69248d34 (diff) | |
Merge tag 'net-7.1-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Including fixes from IPsec and netfilter.
This is relatively small, mostly because we are a bit behind our PW
queue. I'm not aware of any pending regression.
Current release - regressions:
- netfilter: nf_tables_offload: drop device refcount on error
Previous releases - regressions:
- core: add pskb_may_pull() to skb_gro_receive_list()
- xfrm: iptfs: preserve shared-frag marker in iptfs_consume_frags()
- ipv6: fix a potential NPD in cleanup_prefix_route()
- ipv4: fix use-after-free caused by the fqdir_pre_exit() flush
- eth:
- bnxt_en: fix NULL pointer dereference
- emac: fix use-after-free during device removal
- octeontx2-af: fix memory leak in rvu_setup_hw_resources()
- tun: zero the whole vnet header in tun_put_user()
- sit: reload inner IPv6 header after GSO offloads
Previous releases - always broken:
- core: fix double-free in netdev_nl_bind_rx_doit()
- netfilter: nf_log: validate MAC header was set before dumping it
- xfrm: iptfs: fix ABBA deadlock in iptfs_destroy_state()
- tcp: restrict SO_ATTACH_FILTER to priv users
- mctp: usb: fix race between urb completion and rx_retry
cancellation
- eth:
- mlx5: fix slab-out-of-bounds in mlx5_query_nic_vport_mac_list
- mvpp2: sync RX data at the hardware packet offset"
* tag 'net-7.1-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (64 commits)
octeontx2-af: fix IP fragment flag corruption on custom KPU profile load
ipv6: Fix a potential NPD in cleanup_prefix_route()
net: txgbe: initialize PHY interface to 0
net: txgbe: distinguish module types by checking identifier
net: txgbe: initialize module info buffer
net: mvpp2: build skb from XDP-adjusted data on XDP_PASS
net: mvpp2: refill RX buffers before XDP or skb use
net: mvpp2: limit XDP frame size to the RX buffer
net: mvpp2: sync RX data at the hardware packet offset
netfilter: nft_meta_bridge: fix stale stack leak via IIFHWADDR register
netfilter: nft_fib: fix stale stack leak via the OIFNAME register
netfilter: nft_exthdr: fix register tracking for F_PRESENT flag
netfilter: nf_log: validate MAC header was set before dumping it
netfilter: x_tables: avoid leaking percpu counter pointers
netfilter: nf_conntrack: destroy stale expectfn expectations on unregister
netfilter: nf_tables_offload: drop device refcount on error
netfilter: revalidate bridge ports
rds: mark snapshot pages dirty in rds_info_getsockopt()
ip6_vti: fix incorrect tunnel matching in vti6_tnl_lookup()
ptp: ocp: fix resource freeing order
...
71 files changed, 562 insertions, 274 deletions
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index eab6a98d62b9..31cdb11cd78d 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1153,6 +1153,9 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) rmem = of_reserved_mem_lookup(np); of_node_put(np); + if (!rmem) + return -ENODEV; + dma_addr = rmem->base; /* Compute the number of hw descriptors according to the * reserved memory size and the payload buffer size diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 8c86789d867a..297fb36ab8c1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -1880,6 +1880,11 @@ int ena_com_phc_get_timestamp(struct ena_com_dev *ena_dev, u64 *timestamp) continue; } + /* Ensure PHC payload (timestamp, error_flags) is read + * after req_id update is observed + */ + dma_rmb(); + /* req_id was updated by the device which indicates that * PHC timestamp and error_flags are updated too, * checking errors before retrieving timestamp diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c index eb11800f5573..1c9cfec1b633 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c @@ -277,7 +277,7 @@ int bnge_hwrm_func_backing_store_qcaps(struct bnge_dev *bd) struct hwrm_func_backing_store_qcaps_v2_output *resp; struct hwrm_func_backing_store_qcaps_v2_input *req; struct bnge_ctx_mem_info *ctx; - u16 type; + u16 type, next_type; int rc; if (bd->ctx) @@ -294,8 +294,8 @@ int bnge_hwrm_func_backing_store_qcaps(struct bnge_dev *bd) resp = bnge_hwrm_req_hold(bd, req); - for (type = 0; type < BNGE_CTX_V2_MAX; ) { - struct bnge_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + for (type = 0; type < BNGE_CTX_INV; type = next_type) { + struct bnge_ctx_mem_type *ctxm; u8 init_val, init_off, i; __le32 *p; u32 flags; @@ -304,8 +304,14 @@ int bnge_hwrm_func_backing_store_qcaps(struct bnge_dev *bd) rc = bnge_hwrm_req_send(bd, req); if (rc) goto ctx_done; + + next_type = le16_to_cpu(resp->next_valid_type); + if (type >= BNGE_CTX_V2_MAX) + continue; + + ctxm = &ctx->ctx_arr[type]; flags = le32_to_cpu(resp->flags); - type = le16_to_cpu(resp->next_valid_type); + if (!(flags & FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID)) continue; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 35e1f8f663c7..c999f9733326 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5748,7 +5748,7 @@ static void bnxt_disable_int_sync(struct bnxt *bp) { int i; - if (!bp->irq_tbl) + if (!bp->irq_tbl || !bp->bnapi) return; atomic_inc(&bp->intr_sem); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 417dfa18daae..4e503b3d0d2d 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -3144,7 +3144,7 @@ static int emac_probe(struct platform_device *ofdev) netif_carrier_off(ndev); - err = devm_register_netdev(&ofdev->dev, ndev); + err = register_netdev(ndev); if (err) { printk(KERN_ERR "%pOF: failed to register net device (%d)!\n", np, err); @@ -3197,6 +3197,13 @@ static void emac_remove(struct platform_device *ofdev) DBG(dev, "remove" NL); + /* Unregister network device before tearing down hardware + * to prevent use-after-free during deferred cleanup. This ensures + * the network stack stops all operations before hardware resources + * are released. + */ + unregister_netdev(dev->ndev); + cancel_work_sync(&dev->reset_work); if (emac_has_feature(dev, EMAC_FTR_HAS_TAH)) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 892bc7c2e28b..0704e92ab043 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -2633,6 +2633,8 @@ static const struct dpll_pin_ops ice_dpll_pin_ufl_ops = { .state_on_dpll_set = ice_dpll_ufl_pin_state_set, .state_on_dpll_get = ice_dpll_sw_pin_state_get, .direction_get = ice_dpll_pin_sw_direction_get, + .prio_get = ice_dpll_sw_input_prio_get, + .prio_set = ice_dpll_sw_input_prio_set, .frequency_get = ice_dpll_sw_pin_frequency_get, .frequency_set = ice_dpll_sw_pin_frequency_set, .esync_set = ice_dpll_sw_esync_set, diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index 4a51d2727547..71fe8b2a8b4e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -51,7 +51,7 @@ void idpf_ptp_get_features_access(const struct idpf_adapter *adapter) /* Set the device clock time */ direct = VIRTCHNL2_CAP_PTP_SET_DEVICE_CLK_TIME; - mailbox = VIRTCHNL2_CAP_PTP_SET_DEVICE_CLK_TIME; + mailbox = VIRTCHNL2_CAP_PTP_SET_DEVICE_CLK_TIME_MB; ptp->set_dev_clk_time_access = idpf_ptp_get_access(adapter, direct, mailbox); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index f9055b3d6fb1..1881583be5ce 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2780,7 +2780,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, goto put_err; } ppdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - ppdev->dev.of_node = pnp; + ppdev->dev.of_node = of_node_get(pnp); ret = platform_device_add_resources(ppdev, &res, 1); if (ret) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f442b874bb59..ccc24a1301f2 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3917,10 +3917,10 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, struct mvpp2_bm_pool *bm_pool; struct page_pool *pp = NULL; struct sk_buff *skb; - unsigned int frag_size; + unsigned int frag_size, rx_sync_size; dma_addr_t dma_addr; phys_addr_t phys_addr; - int pool, rx_bytes, err, ret; + int pool, rx_bytes, rx_offset, err, ret; struct page *page; void *data; @@ -3933,6 +3933,8 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, rx_status = mvpp2_rxdesc_status_get(port, rx_desc); rx_bytes = mvpp2_rxdesc_size_get(port, rx_desc); rx_bytes -= MVPP2_MH_SIZE; + rx_sync_size = rx_bytes + MVPP2_MH_SIZE; + rx_offset = MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM; dma_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc); pool = (rx_status & MVPP2_RXD_BM_POOL_ID_MASK) >> @@ -3946,9 +3948,10 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, dma_dir = DMA_FROM_DEVICE; } - dma_sync_single_for_cpu(dev->dev.parent, dma_addr, - rx_bytes + MVPP2_MH_SIZE, - dma_dir); + dma_sync_single_range_for_cpu(dev->dev.parent, dma_addr, + MVPP2_SKB_HEADROOM, + rx_sync_size, + dma_dir); /* Buffer header not supported */ if (rx_status & MVPP2_RXD_BUF_HDR) @@ -3970,6 +3973,12 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, else frag_size = bm_pool->frag_size; + err = mvpp2_rx_refill(port, bm_pool, pp, pool); + if (err) { + netdev_err(port->dev, "failed to refill BM pools\n"); + goto err_drop_frame; + } + if (xdp_prog) { struct xdp_rxq_info *xdp_rxq; @@ -3978,7 +3987,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, else xdp_rxq = &rxq->xdp_rxq_long; - xdp_init_buff(&xdp, PAGE_SIZE, xdp_rxq); + xdp_init_buff(&xdp, bm_pool->frag_size, xdp_rxq); xdp_prepare_buff(&xdp, data, MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM, rx_bytes, true); @@ -3987,17 +3996,19 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, if (ret) { xdp_ret |= ret; - err = mvpp2_rx_refill(port, bm_pool, pp, pool); - if (err) { - netdev_err(port->dev, "failed to refill BM pools\n"); - goto err_drop_frame; - } - ps.rx_packets++; ps.rx_bytes += rx_bytes; continue; } + rx_sync_size = max_t(unsigned int, rx_sync_size, + xdp.data_end - xdp.data_hard_start - + MVPP2_SKB_HEADROOM); + + /* Update offset and length to reflect any XDP adjustments. */ + rx_offset = xdp.data - data; + rx_bytes = xdp.data_end - xdp.data; + metasize = xdp.data - xdp.data_meta; } @@ -4007,8 +4018,20 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, skb = slab_build_skb(data); if (!skb) { netdev_warn(port->dev, "skb build failed\n"); - goto err_drop_frame; + if (pp) { + page_pool_put_page(pp, virt_to_head_page(data), + rx_sync_size, true); + } else { + dma_unmap_single_attrs(dev->dev.parent, dma_addr, + bm_pool->buf_size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + mvpp2_frag_free(bm_pool, pp, data); + } + goto err_drop_frame_retired; } + if (pp) + skb_mark_for_recycle(skb); /* If we have RX hardware timestamping enabled, grab the * timestamp from the queue and convert. @@ -4019,16 +4042,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, skb_hwtstamps(skb)); } - err = mvpp2_rx_refill(port, bm_pool, pp, pool); - if (err) { - netdev_err(port->dev, "failed to refill BM pools\n"); - dev_kfree_skb_any(skb); - goto err_drop_frame; - } - - if (pp) - skb_mark_for_recycle(skb); - else + if (!pp) dma_unmap_single_attrs(dev->dev.parent, dma_addr, bm_pool->buf_size, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); @@ -4036,7 +4050,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, ps.rx_packets++; ps.rx_bytes += rx_bytes; - skb_reserve(skb, MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM); + skb_reserve(skb, rx_offset); skb_put(skb, rx_bytes); if (metasize) skb_metadata_set(skb, metasize); @@ -4047,13 +4061,14 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, continue; err_drop_frame: - dev->stats.rx_errors++; - mvpp2_rx_error(port, rx_desc); /* Return the buffer to the pool */ if (rx_status & MVPP2_RXD_BUF_HDR) mvpp2_buff_hdr_pool_put(port, rx_desc, pool, rx_status); else mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr); +err_drop_frame_retired: + dev->stats.rx_errors++; + mvpp2_rx_error(port, rx_desc); } if (xdp_ret & MVPP2_XDP_REDIR) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c index 6b3f453fd500..fe8c4ffcd8f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cn20k/npc.c @@ -1571,53 +1571,49 @@ static u8 npc_map2cn20k_flag(u8 flag) return 0xff; } +static void npc_cn20k_translate_action_flags(struct npc_kpu_profile_action *act) +{ + u8 ltype, val; + + if (act->lid != NPC_LID_LC) + return; + + ltype = act->ltype; + if (ltype != NPC_LT_LC_IP && + ltype != NPC_LT_LC_IP6 && + ltype != NPC_LT_LC_IP_OPT && + ltype != NPC_LT_LC_IP6_EXT) + return; + + switch (act->flags) { + case NPC_F_LC_U_IP_FRAG: + case NPC_F_LC_U_IP6_FRAG: + case NPC_F_LC_L_6TO4: + case NPC_F_LC_L_MPLS_IN_IP: + case NPC_F_LC_L_IP6_TUN_IP6: + case NPC_F_LC_L_IP6_MPLS_IN_IP: + val = npc_map2cn20k_flag(act->flags); + if (val != 0xFF) + act->flags = val; + break; + default: + break; + } +} + void npc_cn20k_update_action_entries_n_flags(struct rvu *rvu, struct npc_kpu_profile_adapter *pfl) { struct npc_kpu_profile_action *action; - int entries, ltype; - u8 flags, val; + int entries; for (int i = 0; i < pfl->kpus; i++) { action = pfl->kpu[i].action; entries = pfl->kpu[i].action_entries; - for (int j = 0; j < entries; j++) { - if (action[j].lid != NPC_LID_LC) - continue; - - ltype = action[j].ltype; - - if (ltype != NPC_LT_LC_IP && - ltype != NPC_LT_LC_IP6 && - ltype != NPC_LT_LC_IP_OPT && - ltype != NPC_LT_LC_IP6_EXT) - continue; - - flags = action[j].flags; - - switch (flags) { - case NPC_F_LC_U_IP_FRAG: - case NPC_F_LC_U_IP6_FRAG: - case NPC_F_LC_L_6TO4: - case NPC_F_LC_L_MPLS_IN_IP: - case NPC_F_LC_L_IP6_TUN_IP6: - case NPC_F_LC_L_IP6_MPLS_IN_IP: - val = npc_map2cn20k_flag(flags); - if (val == 0xFF) { - dev_err(rvu->dev, - "%s: Error to get flag value\n", - __func__); - return; - } - - action[j].flags = val; - break; - default: - break; - } - } + for (int j = 0; j < entries; j++) + npc_cn20k_translate_action_flags(&action[j]); } } @@ -1709,9 +1705,9 @@ int npc_cn20k_apply_custom_kpu(struct rvu *rvu, for (entry = 0; entry < entries; entry++) { profile->kpu[kpu].cam[entry] = cam[entry]; profile->kpu[kpu].action[entry] = action[entry]; + npc_cn20k_translate_action_flags(&profile->kpu[kpu].action[entry]); } } - npc_cn20k_update_action_entries_n_flags(rvu, profile); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 3cf131508ecf..6e907ee19164 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1160,7 +1160,7 @@ cpt: err = rvu_npc_exact_init(rvu); if (err) { dev_err(rvu->dev, "failed to initialize exact match table\n"); - return err; + goto cgx_err; } /* Assign MACs for CGX mapped functions */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index e130e7259275..5c55971abbf0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -290,6 +290,7 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size) { int entries_per_copy = PAGE_SIZE / cqe_size; + size_t copy_bytes; void *init_ents; int err = 0; int i; @@ -314,8 +315,14 @@ static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size) buf += PAGE_SIZE; } } else { + copy_bytes = array_size(entries, cqe_size); + if (WARN_ON_ONCE(copy_bytes > PAGE_SIZE)) { + err = -EINVAL; + goto out; + } + err = copy_to_user((void __user *)buf, init_ents, - array_size(entries, cqe_size)) ? + copy_bytes) ? -EFAULT : 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index d3bab198c99c..d8c7cb8837d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -103,9 +103,15 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, xdptxd->dma_addr = dma_addr; - if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, - mlx5e_xmit_xdp_frame, sq, xdptxd, 0, NULL))) + if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, + mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, + sq, xdptxd, 0, NULL))) { + dma_unmap_single(sq->pdev, dma_addr, xdptxd->len, + DMA_TO_DEVICE); + xdp_return_frame(xdpf); return false; + } /* xmit_mode == MLX5E_XDP_XMIT_MODE_FRAME */ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7c8311f41232..236f89a6483a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -533,23 +533,16 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int list_type) { bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; - u8 (*mac_list)[ETH_ALEN]; + u8 (*mac_list)[ETH_ALEN] = NULL; struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; struct hlist_node *tmp; - int size; + int size = 0; int err; int hi; int i; - size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : - MLX5_MAX_MC_PER_VPORT(esw->dev); - - mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); - if (!mac_list) - return; - hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { @@ -561,7 +554,7 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, goto out; err = mlx5_query_nic_vport_mac_list(esw->dev, vport->vport, list_type, - mac_list, &size); + &mac_list, &size); if (err) goto out; esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 994fe83da4be..a0bb8ee44e35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -105,9 +105,12 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req lockdep_assert_held(&pool->lock); xa_for_each_range(&pool->irqs, index, iter, start, end) { - struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter); int iter_refcount = mlx5_irq_read_locked(iter); + const struct cpumask *iter_mask; + iter_mask = irq_get_effective_affinity_mask(mlx5_irq_get_irq(iter)); + if (!iter_mask) + continue; if (!cpumask_subset(iter_mask, req_mask)) /* skip IRQs with a mask which is not subset of req_mask */ continue; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 4effe37fd455..d63b0e8806b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -324,35 +324,63 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); +static int mlx5_vport_max_mac_list_size(struct mlx5_core_dev *dev, u16 vport, + enum mlx5_list_type list_type) +{ + void *query_ctx, *hca_caps; + int ret = 0; + + if (!vport && !mlx5_core_is_ecpf(dev)) + return list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + query_ctx = kzalloc(MLX5_ST_SZ_BYTES(query_hca_cap_out), GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_ctx); + if (ret) + goto out; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + ret = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_GET(cmd_hca_cap, hca_caps, log_max_current_uc_list) : + 1 << MLX5_GET(cmd_hca_cap, hca_caps, log_max_current_mc_list); + +out: + kfree(query_ctx); + + return ret; +} + int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, u16 vport, enum mlx5_list_type list_type, - u8 addr_list[][ETH_ALEN], - int *list_size) + u8 (**addr_list)[ETH_ALEN], + int *addr_list_size) { u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; + int allowed_list_size; void *nic_vport_ctx; int max_list_size; - int req_list_size; int out_sz; void *out; int err; int i; - req_list_size = *list_size; + if (!addr_list || !addr_list_size) + return -EINVAL; - max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? - 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : - 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + *addr_list = NULL; + *addr_list_size = 0; - if (req_list_size > max_list_size) { - mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", - req_list_size, max_list_size); - req_list_size = max_list_size; - } + max_list_size = mlx5_vport_max_mac_list_size(dev, vport, list_type); + if (max_list_size < 0) + return max_list_size; out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_out) + - req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + max_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); out = kvzalloc(out_sz, GFP_KERNEL); if (!out) @@ -371,16 +399,24 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context); - req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, - allowed_list_size); + allowed_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + if (!allowed_list_size) + goto out; + + *addr_list = kcalloc(allowed_list_size, ETH_ALEN, GFP_KERNEL); + if (!*addr_list) { + err = -ENOMEM; + goto out; + } - *list_size = req_list_size; - for (i = 0; i < req_list_size; i++) { + for (i = 0; i < allowed_list_size; i++) { u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, current_uc_mac_address[i]) + 2; - ether_addr_copy(addr_list[i], mac_addr); + ether_addr_copy((*addr_list)[i], mac_addr); } + *addr_list_size = allowed_list_size; out: kvfree(out); return err; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index f0514251d4f3..8fc32df8e49a 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -204,7 +204,7 @@ int txgbe_set_phy_link(struct wx *wx) static int txgbe_sfp_to_linkmodes(struct wx *wx, struct txgbe_sff_id *id) { __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, }; - DECLARE_PHY_INTERFACE_MASK(interfaces); + DECLARE_PHY_INTERFACE_MASK_ZERO(interfaces); struct txgbe *txgbe = wx->priv; if (id->cable_tech & TXGBE_SFF_DA_PASSIVE_CABLE) { @@ -271,7 +271,7 @@ static int txgbe_sfp_to_linkmodes(struct wx *wx, struct txgbe_sff_id *id) static int txgbe_qsfp_to_linkmodes(struct wx *wx, struct txgbe_sff_id *id) { __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, }; - DECLARE_PHY_INTERFACE_MASK(interfaces); + DECLARE_PHY_INTERFACE_MASK_ZERO(interfaces); struct txgbe *txgbe = wx->priv; if (id->transceiver_type & TXGBE_SFF_ETHERNET_40G_CR4) { @@ -335,7 +335,7 @@ static int txgbe_qsfp_to_linkmodes(struct wx *wx, struct txgbe_sff_id *id) int txgbe_identify_module(struct wx *wx) { - struct txgbe_hic_get_module_info buffer; + struct txgbe_hic_get_module_info buffer = { 0 }; struct txgbe_sff_id *id; int err = 0; u32 mod_abs; @@ -357,18 +357,16 @@ int txgbe_identify_module(struct wx *wx) } id = &buffer.id; - if (id->identifier != TXGBE_SFF_IDENTIFIER_SFP && - id->identifier != TXGBE_SFF_IDENTIFIER_QSFP && - id->identifier != TXGBE_SFF_IDENTIFIER_QSFP_PLUS && - id->identifier != TXGBE_SFF_IDENTIFIER_QSFP28) { - wx_err(wx, "Invalid module\n"); - return -ENODEV; - } - - if (id->transceiver_type == 0xFF) + if (id->identifier == TXGBE_SFF_IDENTIFIER_SFP) return txgbe_sfp_to_linkmodes(wx, id); - return txgbe_qsfp_to_linkmodes(wx, id); + if (id->identifier == TXGBE_SFF_IDENTIFIER_QSFP || + id->identifier == TXGBE_SFF_IDENTIFIER_QSFP_PLUS || + id->identifier == TXGBE_SFF_IDENTIFIER_QSFP28) + return txgbe_qsfp_to_linkmodes(wx, id); + + wx_err(wx, "Invalid module\n"); + return -EINVAL; } void txgbe_setup_link(struct wx *wx) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 6b05f32b4a01..877234e3fdc2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -315,6 +315,9 @@ void txgbe_up(struct wx *wx); int txgbe_setup_tc(struct net_device *dev, u8 tc); void txgbe_do_reset(struct net_device *netdev); +#define DECLARE_PHY_INTERFACE_MASK_ZERO(name) \ + unsigned long name[PHY_INTERFACE_MODE_MAX] = { 0, } + #define TXGBE_LINK_SPEED_UNKNOWN 0 #define TXGBE_LINK_SPEED_10GB_FULL 4 #define TXGBE_LINK_SPEED_25GB_FULL 0x10 diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 59e95341f9b1..4d319c50955e 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/mm.h> +#include <linux/highmem.h> #include <linux/delay.h> #include <linux/io.h> #include <linux/slab.h> @@ -965,12 +966,22 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, } for (i = 0; i < page_count; i++) { - char *src = phys_to_virt(pb[i].pfn << HV_HYP_PAGE_SHIFT); - u32 offset = pb[i].offset; + phys_addr_t paddr = (pb[i].pfn << HV_HYP_PAGE_SHIFT) + + pb[i].offset; u32 len = pb[i].len; - memcpy(dest, (src + offset), len); - dest += len; + while (len) { + struct page *page = phys_to_page(paddr); + u32 off = offset_in_page(paddr); + u32 chunk = min_t(u32, len, PAGE_SIZE - off); + char *src = kmap_local_page(page); + + memcpy(dest, src + off, chunk); + kunmap_local(src); + dest += chunk; + paddr += chunk; + len -= chunk; + } } if (padding) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 3b5dff144177..fade65f2f269 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -22,7 +22,6 @@ struct mctp_usb { struct usb_device *usbdev; struct usb_interface *intf; - bool stopped; struct net_device *netdev; @@ -32,6 +31,9 @@ struct mctp_usb { struct urb *tx_urb; struct urb *rx_urb; + /* enforces atomic access to rx_stopped and requeuing the retry work */ + spinlock_t rx_lock; + bool rx_stopped; struct delayed_work rx_retry_work; }; @@ -122,6 +124,7 @@ static const unsigned long RX_RETRY_DELAY = HZ / 4; static int mctp_usb_rx_queue(struct mctp_usb *mctp_usb, gfp_t gfp) { + unsigned long flags; struct sk_buff *skb; int rc; @@ -147,8 +150,11 @@ static int mctp_usb_rx_queue(struct mctp_usb *mctp_usb, gfp_t gfp) return rc; err_retry: - schedule_delayed_work(&mctp_usb->rx_retry_work, RX_RETRY_DELAY); - return rc; + spin_lock_irqsave(&mctp_usb->rx_lock, flags); + if (!mctp_usb->rx_stopped) + schedule_delayed_work(&mctp_usb->rx_retry_work, RX_RETRY_DELAY); + spin_unlock_irqrestore(&mctp_usb->rx_lock, flags); + return 0; } static void mctp_usb_in_complete(struct urb *urb) @@ -248,9 +254,6 @@ static void mctp_usb_rx_retry_work(struct work_struct *work) struct mctp_usb *mctp_usb = container_of(work, struct mctp_usb, rx_retry_work.work); - if (READ_ONCE(mctp_usb->stopped)) - return; - mctp_usb_rx_queue(mctp_usb, GFP_KERNEL); } @@ -258,7 +261,7 @@ static int mctp_usb_open(struct net_device *dev) { struct mctp_usb *mctp_usb = netdev_priv(dev); - WRITE_ONCE(mctp_usb->stopped, false); + WRITE_ONCE(mctp_usb->rx_stopped, false); netif_start_queue(dev); @@ -268,17 +271,21 @@ static int mctp_usb_open(struct net_device *dev) static int mctp_usb_stop(struct net_device *dev) { struct mctp_usb *mctp_usb = netdev_priv(dev); + unsigned long flags; netif_stop_queue(dev); /* prevent RX submission retry */ - WRITE_ONCE(mctp_usb->stopped, true); + spin_lock_irqsave(&mctp_usb->rx_lock, flags); + mctp_usb->rx_stopped = true; + cancel_delayed_work(&mctp_usb->rx_retry_work); + spin_unlock_irqrestore(&mctp_usb->rx_lock, flags); + + flush_delayed_work(&mctp_usb->rx_retry_work); usb_kill_urb(mctp_usb->rx_urb); usb_kill_urb(mctp_usb->tx_urb); - cancel_delayed_work_sync(&mctp_usb->rx_retry_work); - return 0; } @@ -331,6 +338,7 @@ static int mctp_usb_probe(struct usb_interface *intf, dev->netdev = netdev; dev->usbdev = interface_to_usbdev(intf); dev->intf = intf; + spin_lock_init(&dev->rx_lock); usb_set_intfdata(intf, dev); dev->ep_in = ep_in->bEndpointAddress; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 3370eb822017..1511385b9b36 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1718,6 +1718,9 @@ static int phy_sfp_probe(struct phy_device *phydev) ret = sfp_bus_add_upstream(bus, phydev, &sfp_phydev_ops); sfp_bus_put(bus); + + if (ret) + phydev->sfp_bus = NULL; } if (!ret && phydev->sfp_bus) @@ -3509,9 +3512,15 @@ static int phy_setup_ports(struct phy_device *phydev) if (ret) return ret; - ret = phy_sfp_probe(phydev); - if (ret) - goto out; + /* We don't support SFP with genphy drivers. Also, genphy driver + * binding occurs with RTNL help, which will deadlock the call to + * sfp_bus_add_upstream(). + */ + if (!phydev->is_genphy_driven) { + ret = phy_sfp_probe(phydev); + if (ret) + goto out; + } if (phydev->n_ports < phydev->max_n_ports) { ret = phy_default_setup_single_port(phydev); @@ -3775,6 +3784,11 @@ static int phy_probe(struct device *dev) return 0; out: + sfp_bus_del_upstream(phydev->sfp_bus); + phydev->sfp_bus = NULL; + + phy_cleanup_ports(phydev); + if (!phydev->is_on_sfp_module) phy_led_triggers_unregister(phydev); @@ -3798,11 +3812,11 @@ static int phy_remove(struct device *dev) phydev->state = PHY_DOWN; - phy_cleanup_ports(phydev); - sfp_bus_del_upstream(phydev->sfp_bus); phydev->sfp_bus = NULL; + phy_cleanup_ports(phydev); + if (phydev->drv && phydev->drv->remove) phydev->drv->remove(phydev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9e7744eb57a3..fed9dfdfcc3b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2070,6 +2070,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, struct virtio_net_hdr_v1_hash_tunnel hdr; struct virtio_net_hdr *gso; + memset(&hdr, 0, sizeof(hdr)); ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb, &hdr); if (ret) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 1ace1d2398c9..b1268553cd70 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9851,7 +9851,12 @@ static int rtl8152_probe_once(struct usb_interface *intf, struct net_device *netdev; int ret; - usb_reset_device(udev); + ret = usb_reset_device(udev); + if (ret < 0) { + dev_err(&intf->dev, "USB reset failed, errno=%d\n", ret); + return ret; + } + netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { dev_err(&intf->dev, "Out of memory\n"); diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index beacc2ffb166..735385539b9f 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2479,8 +2479,13 @@ ptp_ocp_ts_enable(void *priv, u32 req, bool enable) iowrite32(1, ®->intr_mask); iowrite32(1, ®->intr); } else { + int irq_vec = pci_irq_vector(bp->pdev, ext->irq_vec); + iowrite32(0, ®->intr_mask); iowrite32(0, ®->enable); + ioread32(®->intr_mask); + if (irq_vec > 0) + synchronize_irq(irq_vec); } return 0; @@ -4867,6 +4872,22 @@ ptp_ocp_detach(struct ptp_ocp *bp) ptp_ocp_detach_sysfs(bp); ptp_ocp_attr_group_del(bp); timer_delete_sync(&bp->watchdog); + /* Disable interrupts on all timestampers */ + if (bp->ts0) + ptp_ocp_ts_enable(bp->ts0, 0, false); + if (bp->ts1) + ptp_ocp_ts_enable(bp->ts1, 0, false); + if (bp->ts2) + ptp_ocp_ts_enable(bp->ts2, 0, false); + if (bp->ts3) + ptp_ocp_ts_enable(bp->ts3, 0, false); + if (bp->ts4) + ptp_ocp_ts_enable(bp->ts4, 0, false); + if (bp->pps) + ptp_ocp_ts_enable(bp->pps, ~0, false); + if (bp->ptp) + ptp_clock_unregister(bp->ptp); + kfree(bp->ptp_info.pin_config); ptp_ocp_unregister_ext(bp->ts0); ptp_ocp_unregister_ext(bp->ts1); ptp_ocp_unregister_ext(bp->ts2); @@ -4884,9 +4905,6 @@ ptp_ocp_detach(struct ptp_ocp *bp) clk_hw_unregister_fixed_rate(bp->i2c_clk); if (bp->n_irqs) pci_free_irq_vectors(bp->pdev); - if (bp->ptp) - ptp_clock_unregister(bp->ptp); - kfree(bp->ptp_info.pin_config); device_unregister(&bp->dev); } diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index dfa2fe32217a..282ed5442282 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -102,8 +102,8 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, u16 vport, enum mlx5_list_type list_type, - u8 addr_list[][ETH_ALEN], - int *list_size); + u8 (**mac_list)[ETH_ALEN], + int *mac_list_size); int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index de2f956abf34..24cf3d2d9745 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -155,6 +155,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n); void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n); +void nf_ct_helper_expectfn_destroy(const struct nf_ct_helper_expectfn *n); struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name); struct nf_ct_helper_expectfn * diff --git a/include/net/sock.h b/include/net/sock.h index dccd3738c368..95e157eee8d9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1856,6 +1856,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, gfp_t priority); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); +void sock_rmem_free(struct sk_buff *skb); void sock_efree(struct sk_buff *skb); #ifdef CONFIG_INET void sock_edemux(struct sk_buff *skb); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 3fda71a8579d..73f185cccd63 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -39,7 +39,9 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) dev = xt_in(par); break; case NF_BR_PRE_ROUTING: - dev = br_port_get_rcu(xt_in(par))->br->dev; + dev = netdev_master_upper_dev_get_rcu(xt_in(par)); + if (!dev) /* bridge port removed? */ + return EBT_DROP; break; default: dev = NULL; diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 307790562b49..83486cd4d564 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -24,12 +24,18 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) if (skb_ensure_writable(skb, 0)) return EBT_DROP; - if (xt_hooknum(par) != NF_BR_BROUTING) - /* rcu_read_lock()ed by nf_hook_thresh */ - ether_addr_copy(eth_hdr(skb)->h_dest, - br_port_get_rcu(xt_in(par))->br->dev->dev_addr); - else + if (xt_hooknum(par) != NF_BR_BROUTING) { + const struct net_device *dev; + + dev = netdev_master_upper_dev_get_rcu(xt_in(par)); + if (!dev) + return EBT_DROP; + + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); + } else { ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr); + } + skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 7763e78abb00..219c40680260 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -64,6 +64,8 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, if (!br_dev) goto err; + /* ETH_ALEN (6) is shorter than the destination register span (8) */ + dest[1] = 0; memcpy(dest, br_dev->dev_addr, ETH_ALEN); return; default: diff --git a/net/core/gro.c b/net/core/gro.c index a84753983467..35f2f708f010 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -232,6 +232,11 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) if (unlikely(p->len + skb->len >= 65536)) return -E2BIG; + if (!pskb_may_pull(skb, skb_gro_offset(skb))) { + NAPI_GRO_CB(skb)->flush = 1; + return -ENOMEM; + } + if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index b8f6076d8007..119eaa6501d5 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -1095,8 +1095,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) genlmsg_end(rsp, hdr); err = genlmsg_reply(rsp, info); - if (err) - goto err_unbind; bitmap_free(rxq_bitmap); @@ -1104,7 +1102,7 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) mutex_unlock(&priv->lock); - return 0; + return err < 0 ? err : 0; err_unbind: net_devmem_unbind_dmabuf(binding); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c02f0a507ba8..8eab8eb5006a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5450,7 +5450,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); -static void sock_rmem_free(struct sk_buff *skb) +void sock_rmem_free(struct sk_buff *skb) { struct sock *sk = skb->sk; @@ -5459,8 +5459,8 @@ static void sock_rmem_free(struct sk_buff *skb) static void skb_set_err_queue(struct sk_buff *skb) { - /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. - * So, it is safe to (mis)use it to mark skbs on the error queue. + /* The error-queue test in skb_is_err_queue() matches this marker + * with the sock_rmem_free destructor installed by sock_queue_err_skb(). */ skb->pkt_type = PACKET_OUTGOING; BUILD_BUG_ON(PACKET_OUTGOING == 0); diff --git a/net/core/sock.c b/net/core/sock.c index d097025c116a..cab041b57d28 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1465,6 +1465,11 @@ set_sndbuf: case SO_ATTACH_FILTER: { struct sock_fprog fprog; + if (sk_is_tcp(sk) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); if (!ret) ret = sk_attach_filter(&fprog, sk); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 513c8215c947..dfc81ee969ae 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -96,7 +96,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) +static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref) { struct crypto_aead *aead = x->data; int extralen = 0; @@ -113,10 +113,13 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ - if (req->src != req->dst) - for (sg = sg_next(req->src); sg; sg = sg_next(sg)) + if (already_unref || req->src != req->dst) { + struct scatterlist *src = already_unref ? esp_req_sg(aead, req) : req->src; + + for (sg = sg_next(src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); + } } #ifdef CONFIG_INET_ESPINTCP @@ -220,7 +223,7 @@ static void esp_output_done(void *data, int err) } tmp = ESP_SKB_CB(skb)->tmp; - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); kfree(tmp); if (xo && (xo->flags & XFRM_DEV_RESUME)) { @@ -569,8 +572,10 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + esp_ssg_unref(x, tmp, skb, true); goto error_free; + } } if ((x->props.flags & XFRM_STATE_ESN)) @@ -602,7 +607,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * } if (sg != dsg) - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 393770920abd..1127519b8416 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -328,6 +328,9 @@ void inet_frag_queue_flush(struct inet_frag_queue *q, reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT; sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); sub_frag_mem_limit(q->fqdir, sum); + q->rb_fragments = RB_ROOT; + q->fragments_tail = NULL; + q->last_run_head = NULL; } EXPORT_SYMBOL(inet_frag_queue_flush); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 56b0f738d2f2..c790d2f49487 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -250,9 +250,6 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; - qp->q.rb_fragments = RB_ROOT; - qp->q.fragments_tail = NULL; - qp->q.last_run_head = NULL; qp->iif = 0; qp->ecn = 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ad2259678c78..0ea513bf77fb 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -702,14 +702,12 @@ static int copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = loc_cpu_entry + off; - if (copy_to_user(userptr + off, e, sizeof(*e))) { - ret = -EFAULT; - goto free_counters; - } - if (copy_to_user(userptr + off + if (copy_to_user(userptr + off, e, + offsetof(struct arpt_entry, counters)) || + copy_to_user(userptr + off + offsetof(struct arpt_entry, counters), &counters[num], - sizeof(counters[num])) != 0) { + sizeof(counters[num]))) { ret = -EFAULT; goto free_counters; } @@ -1327,9 +1325,8 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, origsize = *size; ce = *dstptr; - if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || - copy_to_user(&ce->counters, &counters[i], - sizeof(counters[i])) != 0) + if (copy_to_user(ce, e, offsetof(struct compat_arpt_entry, counters)) || + copy_to_user(&ce->counters, &counters[i], sizeof(counters[i]))) return -EFAULT; *dstptr += sizeof(struct compat_arpt_entry); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5cbdb0815857..ca8ff0ae6cdb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -832,14 +832,12 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = loc_cpu_entry + off; - if (copy_to_user(userptr + off, e, sizeof(*e))) { - ret = -EFAULT; - goto free_counters; - } - if (copy_to_user(userptr + off + if (copy_to_user(userptr + off, e, + offsetof(struct ipt_entry, counters)) || + copy_to_user(userptr + off + offsetof(struct ipt_entry, counters), &counters[num], - sizeof(counters[num])) != 0) { + sizeof(counters[num]))) { ret = -EFAULT; goto free_counters; } @@ -1228,9 +1226,8 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, origsize = *size; ce = *dstptr; - if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || - copy_to_user(&ce->counters, &counters[i], - sizeof(counters[i])) != 0) + if (copy_to_user(ce, e, offsetof(struct compat_ipt_entry, counters)) || + copy_to_user(&ce->counters, &counters[i], sizeof(counters[i]))) return -EFAULT; *dstptr += sizeof(struct compat_ipt_entry); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index faee20af4856..10e1b0837731 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -555,6 +555,8 @@ static void __exit nf_nat_h323_fini(void) nf_ct_helper_expectfn_unregister(&q931_nat); nf_ct_helper_expectfn_unregister(&callforwarding_nat); synchronize_rcu(); + nf_ct_helper_expectfn_destroy(&q931_nat); + nf_ct_helper_expectfn_destroy(&callforwarding_nat); } /****************************************************************************/ diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 9d0c6d75109b..177d738825b4 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -128,7 +128,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, fl4.saddr = get_saddr(iph->daddr); } - *dest = 0; + nft_fib_store_result(dest, priv, NULL); if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index bb84a78b80f6..c9e5d3e48ab9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1265,6 +1265,7 @@ static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt, bool del_peer) { + struct net *net = dev_net(ifp->idev->dev); struct fib6_table *table; struct fib6_info *f6i; @@ -1273,9 +1274,10 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) - ip6_del_rt(dev_net(ifp->idev->dev), f6i, false); + ip6_del_rt(net, f6i, false); else { - if (!(f6i->fib6_flags & RTF_EXPIRES)) { + if (f6i != net->ipv6.fib6_null_entry && + !(f6i->fib6_flags & RTF_EXPIRES)) { table = f6i->fib6_table; spin_lock_bh(&table->tb6_lock); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 57481e423e59..296b57926abb 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -113,7 +113,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) +static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb, bool already_unref) { struct crypto_aead *aead = x->data; int extralen = 0; @@ -130,10 +130,13 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) /* Unref skb_frag_pages in the src scatterlist if necessary. * Skip the first sg which comes from skb->data. */ - if (req->src != req->dst) - for (sg = sg_next(req->src); sg; sg = sg_next(sg)) + if (already_unref || req->src != req->dst) { + struct scatterlist *src = already_unref ? esp_req_sg(aead, req) : req->src; + + for (sg = sg_next(src); sg; sg = sg_next(sg)) skb_page_unref(page_to_netmem(sg_page(sg)), skb->pp_recycle); + } } #ifdef CONFIG_INET6_ESPINTCP @@ -254,7 +257,7 @@ static void esp_output_done(void *data, int err) } tmp = ESP_SKB_CB(skb)->tmp; - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); kfree(tmp); esp_output_encap_csum(skb); @@ -600,8 +603,10 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info err = skb_to_sgvec(skb, dsg, (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + esp_ssg_unref(x, tmp, skb, true); goto error_free; + } } if ((x->props.flags & XFRM_STATE_ESN)) @@ -634,7 +639,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } if (sg != dsg) - esp_ssg_unref(x, tmp, skb); + esp_ssg_unref(x, tmp, skb, false); if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) err = esp_output_tail_tcp(x, skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index df793c8bfffb..d871cab6938d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -106,6 +106,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, hash = HASH(&any, local); for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_any(&t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } @@ -113,6 +114,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, hash = HASH(remote, &any); for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && (t->dev->flags & IFF_UP)) return t; } @@ -1159,6 +1161,7 @@ static int __net_init vti6_init_net(struct net *net) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); ip6n->fb_tnl_dev->rtnl_link_ops = &vti6_link_ops; + ip6n->fb_tnl_dev->netns_immutable = true; err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9d9c3763f2f5..e34d5ba1460c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -848,14 +848,12 @@ copy_entries_to_user(unsigned int total_size, const struct xt_entry_target *t; e = loc_cpu_entry + off; - if (copy_to_user(userptr + off, e, sizeof(*e))) { - ret = -EFAULT; - goto free_counters; - } - if (copy_to_user(userptr + off + if (copy_to_user(userptr + off, e, + offsetof(struct ip6t_entry, counters)) || + copy_to_user(userptr + off + offsetof(struct ip6t_entry, counters), &counters[num], - sizeof(counters[num])) != 0) { + sizeof(counters[num]))) { ret = -EFAULT; goto free_counters; } @@ -1244,9 +1242,8 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, origsize = *size; ce = *dstptr; - if (copy_to_user(ce, e, sizeof(struct ip6t_entry)) != 0 || - copy_to_user(&ce->counters, &counters[i], - sizeof(counters[i])) != 0) + if (copy_to_user(ce, e, offsetof(struct compat_ip6t_entry, counters)) || + copy_to_user(&ce->counters, &counters[i], sizeof(counters[i]))) return -EFAULT; *dstptr += sizeof(struct compat_ip6t_entry); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 2dbe44715df3..b9ad7cac1417 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -239,7 +239,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); - *dest = 0; + nft_fib_store_result(dest, priv, NULL); ret = nft_fib6_lookup(nft_net(pkt), &fl6, &res, lookup_flags); if (ret || res.fib6_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) return; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 201347b4e127..b41e231a669b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -961,6 +961,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ip_rt_put(rt); goto tx_error; } + iph6 = ipv6_hdr(skb); if (df) { mtu = dst4_mtu(&rt->dst) - t_hlen; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 17e971bd4c74..2c5a71735561 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -283,6 +283,25 @@ void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); +static bool expect_iter_expectfn(struct nf_conntrack_expect *exp, void *data) +{ + const struct nf_ct_helper_expectfn *n = data; + + /* Relies on registered expectfn descriptors having unique ->expectfn + * pointers, which holds for the in-tree NAT helpers. + */ + return exp->expectfn == n->expectfn; +} + +/* Destroy expectations still pointing at @n->expectfn; call after the + * caller's RCU grace period so none outlives the (often modular) callback. + */ +void nf_ct_helper_expectfn_destroy(const struct nf_ct_helper_expectfn *n) +{ + nf_ct_expect_iterate_destroy(expect_iter_expectfn, (void *)n); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_destroy); + /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 3b0a70e154cd..3d88ef927f31 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -74,16 +74,18 @@ int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, struct flow_action_entry *entry; struct net_device *dev; - /* nft_flow_rule_destroy() releases the reference on this device. */ dev = dev_get_by_index(ctx->net, oif); if (!dev) return -EOPNOTSUPP; entry = nft_flow_action_entry_next(ctx, flow); - if (!entry) + if (!entry) { + dev_put(dev); return -E2BIG; + } entry->id = id; + /* nft_flow_rule_destroy() releases the reference on this device. */ entry->dev = dev; return 0; diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 7a8952b049d1..e37b09b3203b 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -815,8 +815,8 @@ static void dump_mac_header(struct nf_log_buf *m, fallback: nf_log_buf_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { + if (dev->hard_header_len && skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) != 0) { const unsigned char *p = skb_mac_header(skb); unsigned int i; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 74ec224ce0d6..2bbf5163c0e2 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1341,6 +1341,7 @@ static int __init nf_nat_init(void) RCU_INIT_POINTER(nf_nat_hook, NULL); nf_ct_helper_expectfn_unregister(&follow_master_nat); synchronize_net(); + nf_ct_helper_expectfn_destroy(&follow_master_nat); unregister_pernet_subsys(&nat_net_ops); kvfree(nf_nat_bysource); } @@ -1358,6 +1359,7 @@ static void __exit nf_nat_cleanup(void) RCU_INIT_POINTER(nf_nat_hook, NULL); synchronize_net(); + nf_ct_helper_expectfn_destroy(&follow_master_nat); kvfree(nf_nat_bysource); unregister_pernet_subsys(&nat_net_ops); } diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index 9fbfc6bff0c2..00838c0cc5bb 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -655,6 +655,7 @@ static void __exit nf_nat_sip_fini(void) RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); + nf_ct_helper_expectfn_destroy(&sip_nat); } static const struct nf_nat_sip_hooks sip_hooks = { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2439cbbd5b26..fa3657599861 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -451,6 +451,23 @@ nla_put_failure: return -1; } +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +static int nflog_put_master_ifindex(struct sk_buff *nlskb, int attr, + const struct net_device *dev) +{ + const struct net_device *upper; + + if (dev && !netif_is_bridge_port(dev)) + return 0; + + upper = netdev_master_upper_dev_get_rcu((struct net_device *)dev); + if (upper && nla_put_be32(nlskb, attr, htonl(upper->ifindex))) + return -EMSGSIZE; + + return 0; +} +#endif + /* This is an inline function, we don't really care about a long * list of arguments */ static inline int @@ -505,8 +522,7 @@ __build_packet_message(struct nfnl_log_net *log, /* rcu_read_lock()ed by nf_hook_thresh or * nf_log_packet. */ - nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(br_port_get_rcu(indev)->br->dev->ifindex))) + nflog_put_master_ifindex(inst->skb, NFULA_IFINDEX_INDEV, indev)) goto nla_put_failure; } else { int physinif; @@ -542,8 +558,7 @@ __build_packet_message(struct nfnl_log_net *log, /* rcu_read_lock()ed by nf_hook_thresh or * nf_log_packet. */ - nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) + nflog_put_master_ifindex(inst->skb, NFULA_IFINDEX_OUTDEV, outdev)) goto nla_put_failure; } else { struct net_device *physoutdev; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 60ab88d45096..c5e29fec419b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -440,10 +440,47 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry, bool *is_ return false; } +static bool nf_bridge_port_valid(const struct net_device *dev) +{ + if (!dev) + return true; + + return netif_is_bridge_port(dev); +} + +/* queued skbs leave rcu protection. We bump device refcount so that + * the device cannot go away. However, while packet was out the port + * could have been removed from the bridge. + * + * Ensure in+outdev are still part of a bridge at reinject time. + * + * The device rx_handler_data could even be pointing at data that is + * not a net_bridge_port structure. + */ +static bool nf_bridge_ports_valid(const struct nf_queue_entry *entry) +{ +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + if (!nf_bridge_port_valid(entry->physin) || + !nf_bridge_port_valid(entry->physout)) + return false; +#endif + if (entry->state.pf != PF_BRIDGE) + return true; + + if (!nf_bridge_port_valid(entry->state.in) || + !nf_bridge_port_valid(entry->state.out)) + return false; + + return true; +} + static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { const struct nf_ct_hook *ct_hook; + if (!nf_bridge_ports_valid(entry)) + verdict = NF_DROP; + if (verdict == NF_ACCEPT || verdict == NF_REPEAT || verdict == NF_STOP) { @@ -636,6 +673,23 @@ static int nf_queue_checksum_help(struct sk_buff *entskb) return skb_checksum_help(entskb); } +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +static int nfqnl_put_master_ifindex(struct sk_buff *nlskb, int attr, + const struct net_device *dev) +{ + const struct net_device *upper; + + if (dev && !netif_is_bridge_port(dev)) + return 0; + + upper = netdev_master_upper_dev_get_rcu((struct net_device *)dev); + if (upper && nla_put_be32(nlskb, attr, htonl(upper->ifindex))) + return -EMSGSIZE; + + return 0; +} +#endif + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -771,10 +825,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, * netfilter_bridge) */ if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)) || - /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by __nf_queue */ - nla_put_be32(skb, NFQA_IFINDEX_INDEV, - htonl(br_port_get_rcu(indev)->br->dev->ifindex))) + nfqnl_put_master_ifindex(skb, NFQA_IFINDEX_INDEV, indev)) goto nla_put_failure; } else { int physinif; @@ -805,10 +856,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, * netfilter_bridge) */ if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)) || - /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by __nf_queue */ - nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, - htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) + nfqnl_put_master_ifindex(skb, NFQA_IFINDEX_OUTDEV, outdev)) goto nla_put_failure; } else { int physoutif; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index e6a07c0df207..d3fc7969f123 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -532,6 +532,9 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, return err; } + if ((flags & NFT_EXTHDR_F_PRESENT) && len != 1) + return -EINVAL; + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 327a5f33659c..a1632e308f18 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -107,6 +107,12 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } + if (priv->flags & NFTA_FIB_F_PRESENT) { + if (priv->result != NFT_FIB_RESULT_OIF) + return -EINVAL; + len = sizeof(u8); + } + err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index ca7a9e2a3de7..870e7699326a 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -114,14 +114,14 @@ static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, - [NLBL_UNLABEL_A_IPV6ADDR] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, - [NLBL_UNLABEL_A_IPV6MASK] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, - [NLBL_UNLABEL_A_IPV4ADDR] = { .type = NLA_BINARY, - .len = sizeof(struct in_addr) }, - [NLBL_UNLABEL_A_IPV4MASK] = { .type = NLA_BINARY, - .len = sizeof(struct in_addr) }, + [NLBL_UNLABEL_A_IPV6ADDR] = + NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), + [NLBL_UNLABEL_A_IPV6MASK] = + NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), + [NLBL_UNLABEL_A_IPV4ADDR] = + NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)), + [NLBL_UNLABEL_A_IPV4MASK] = + NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)), [NLBL_UNLABEL_A_IFACE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY } @@ -757,24 +757,14 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, void **mask, u32 *len) { - u32 addr_len; - if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { - addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); - if (addr_len != sizeof(struct in_addr) && - addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) - return -EINVAL; - *len = addr_len; + *len = sizeof(struct in_addr); *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4MASK]); return 0; } else if (info->attrs[NLBL_UNLABEL_A_IPV6ADDR]) { - addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); - if (addr_len != sizeof(struct in6_addr) && - addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV6MASK])) - return -EINVAL; - *len = addr_len; + *len = sizeof(struct in6_addr); *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6MASK]); return 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bbbde50fc649..f0164817d9b7 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1316,6 +1316,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(reply)) { error = PTR_ERR(reply); + reply = NULL; goto err_unlock_ovs; } } diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 86325b7fc1b6..ad44831d6745 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -108,7 +108,7 @@ static void phonet_device_destroy(struct net_device *dev) for_each_set_bit(addr, pnd->addrs, 64) phonet_address_notify(net, RTM_DELADDR, ifindex, addr); - kfree(pnd); + kfree_rcu(pnd, rcu); } } diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 7cec6a7859b0..db823177e636 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -707,13 +707,13 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) if (port == QRTR_PORT_CTRL) port = 0; - __sock_put(&ipc->sk); - xa_erase(&qrtr_ports, port); /* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ synchronize_rcu(); + + __sock_put(&ipc->sk); } /* Assign port number to socket. diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index fcd04c29f543..d6be95542119 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -170,6 +170,8 @@ static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic, break; case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: if (send->s_op) { rm = container_of(send->s_op, struct rds_message, atomic); rds_ib_send_unmap_atomic(ic, send->s_op, wc_status); diff --git a/net/rds/info.c b/net/rds/info.c index f1b29994934a..17061f6ff74e 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -235,7 +235,7 @@ call_func: out: if (pages) - unpin_user_pages(pages, nr_pages); + unpin_user_pages_dirty_lock(pages, nr_pages, true); kfree(pages); return ret; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 24aceb183c2c..ce761466b02d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -963,23 +963,34 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_txqueue *tq = call->tx_queue; unsigned long extracted = ~0UL; - unsigned int nr = 0; + unsigned int nr = 0, nsack; rxrpc_seq_t seq = call->acks_hard_ack + 1; rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; - u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); + u8 sack[256] __aligned(sizeof(unsigned long)); + u8 *acks = sack; _enter("%x,%x,%u", tq->qbase, seq, sp->ack.nr_acks); while (after(seq, tq->qbase + RXRPC_NR_TXQUEUE - 1)) tq = tq->next; + /* Extract an individual SACK table. A normal SACK table is up to 255 + * bytes with 1 ACK flag per byte, but an extended SACK table can be up + * to 256 bytes with up to 8 ACK/NACK flags per byte. The ACK flags go + * across all bit 0's then all bit 1's, then all bit 2's, ... + */ + memset(sack, 0, sizeof(sack)); + nsack = umin(sp->ack.nr_acks, 256); + if (skb_copy_bits(skb, + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket), + sack, nsack) < 0) + return; + for (unsigned int i = 0; i < sp->ack.nr_acks; i++) { /* Decant ACKs until we hit a txqueue boundary. */ + if ((i & 255) == 0) + acks = sack; shiftr_adv_rotr(acks, extracted); - if (i == 256) { - acks -= i; - i = 0; - } seq++; nr++; if ((seq & RXRPC_TXQ_MASK) != 0) @@ -1117,9 +1128,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) skb_copy_bits(skb, ioffset, &trailer, sizeof(trailer)) < 0) return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_trailer); - if (nr_acks > 0) - skb_condense(skb); - call->acks_latest_ts = ktime_get_real(); call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 75e3e61d494e..31737f144c7f 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -275,6 +275,16 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, param = (struct sctp_paramhdr *)raw_addr_list; rawaddr = (union sctp_addr_param *)raw_addr_list; + if (addrs_len < sizeof(*param)) { + retval = -EINVAL; + goto out_err; + } + len = ntohs(param->length); + if (addrs_len < len) { + retval = -EINVAL; + goto out_err; + } + af = sctp_get_af_specific(param_type2af(param->type)); if (unlikely(!af) || !af->from_addr_param(&addr, rawaddr, htons(port), 0)) { @@ -291,7 +301,6 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, goto out_err; next: - len = ntohs(param->length); addrs_len -= len; raw_addr_list += len; } diff --git a/net/sctp/input.c b/net/sctp/input.c index e119e460ccde..864741fae418 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1204,6 +1204,14 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( /* Skip over the ADDIP header and find the Address parameter */ param = (union sctp_addr_param *)(asconf + 1); + /* The whole address parameter must lie within the chunk before + * af->from_addr_param() reads the variable-length address; otherwise a + * truncated trailing ASCONF chunk lets it read uninitialized bytes past + * the parameter. + */ + if (sizeof(*asconf) + ntohs(param->p.length) > ntohs(ch->length)) + return NULL; + af = sctp_get_af_specific(param_type2af(param->p.type)); if (unlikely(!af)) return NULL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 85264862fb6b..1741a9f33d8c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1731,8 +1731,8 @@ struct sctp_association *sctp_unpack_cookie( struct sk_buff *skb = chunk->skb; struct sctp_cookie *bear_cookie; struct sctp_chunkhdr *ch; + unsigned int len, chlen; enum sctp_scope scope; - unsigned int len; ktime_t kt; /* Header size is static data prior to the actual cookie, including @@ -1761,7 +1761,12 @@ struct sctp_association *sctp_unpack_cookie( bear_cookie = &cookie->c; ch = (struct sctp_chunkhdr *)(bear_cookie + 1); - if (ntohs(ch->length) > len - fixed_size) + chlen = ntohs(ch->length); + if (chlen < sizeof(struct sctp_init_chunk)) + goto malformed; + if (chlen > len - fixed_size) + goto malformed; + if (bear_cookie->raw_addr_list_len > len - fixed_size - chlen) goto malformed; /* Verify the cookie's MAC, if cookie authentication is enabled. */ diff --git a/net/sctp/stream.c b/net/sctp/stream.c index c2247793c88b..5c2fdedea088 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -1038,6 +1038,7 @@ struct sctp_chunk *sctp_process_strreset_resp( stsn, rtsn, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) { struct sctp_strreset_addstrm *addstrm; + const struct sctp_sched_ops *sched; __u16 number; addstrm = (struct sctp_strreset_addstrm *)req; @@ -1048,7 +1049,10 @@ struct sctp_chunk *sctp_process_strreset_resp( for (i = number; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; } else { - sctp_stream_shrink_out(stream, number); + sched = sctp_sched_ops_from_stream(stream); + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, NULL, number); + sched->sched_all(stream); stream->outcnt = number; } diff --git a/net/socket.c b/net/socket.c index 22a412fdec07..c2698a1441a7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -852,12 +852,13 @@ EXPORT_SYMBOL(kernel_sendmsg); static bool skb_is_err_queue(const struct sk_buff *skb) { - /* pkt_type of skbs enqueued on the error queue are set to - * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do - * in recvmsg, since skbs received on a local socket will never - * have a pkt_type of PACKET_OUTGOING. + /* Error-queue skbs are marked as PACKET_OUTGOING in + * skb_set_err_queue() and use the destructor installed by + * sock_queue_err_skb(). PACKET_OUTGOING alone is not unique: + * AF_PACKET outgoing taps use the same pkt_type. */ - return skb->pkt_type == PACKET_OUTGOING; + return skb->pkt_type == PACKET_OUTGOING && + skb->destructor == sock_rmem_free; } /* On transmit, software and hardware timestamps are returned independently. diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index a2756186e13a..d9035546375e 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -346,6 +346,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = -ENOBUFS; goto unlock; } + if (emsg->len) { + err = -ENOBUFS; + goto unlock; + } sk_msg_init(&emsg->skmsg); while (1) { diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 6c6bbc040517..ad810d1f97c0 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -954,6 +954,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, u32 first_iplen, iphlen, iplen, remaining, tail; u32 capturelen; u64 seq; + bool first_skb_partial = false; xtfs = x->mode_data; net = xs_net(x); @@ -1161,6 +1162,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, spin_unlock(&xtfs->drop_lock); + first_skb_partial = (first_skb == skb); break; } @@ -1172,7 +1174,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, /* this should not happen from the above code */ XFRM_INC_STATS(net, LINUX_MIB_XFRMINIPTFSERROR); - if (first_skb && first_iplen && !defer && first_skb != xtfs->ra_newskb) { + if (first_skb && first_iplen && !defer && !first_skb_partial) { /* first_skb is queued b/c !defer and not partial */ if (pskb_trim(first_skb, first_iplen)) { /* error trimming */ @@ -2168,6 +2170,8 @@ static void iptfs_consume_frags(struct sk_buff *to, struct sk_buff *from) memcpy(&toi->frags[toi->nr_frags], fromi->frags, sizeof(fromi->frags[0]) * fromi->nr_frags); toi->nr_frags += fromi->nr_frags; + if (fromi->nr_frags) + toi->flags |= fromi->flags & SKBFL_SHARED_FRAG; fromi->nr_frags = 0; from->data_len = 0; from->len = 0; @@ -2726,8 +2730,9 @@ static void iptfs_destroy_state(struct xfrm_state *x) if (!xtfs) return; - spin_lock_bh(&xtfs->x->lock); hrtimer_cancel(&xtfs->iptfs_timer); + + spin_lock_bh(&xtfs->x->lock); __skb_queue_head_init(&list); skb_queue_splice_init(&xtfs->queue, &list); spin_unlock_bh(&xtfs->x->lock); @@ -2735,9 +2740,7 @@ static void iptfs_destroy_state(struct xfrm_state *x) while ((skb = __skb_dequeue(&list))) kfree_skb(skb); - spin_lock_bh(&xtfs->drop_lock); hrtimer_cancel(&xtfs->drop_timer); - spin_unlock_bh(&xtfs->drop_lock); if (xtfs->ra_newskb) kfree_skb(xtfs->ra_newskb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dd09d2063da2..959544425692 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1156,15 +1156,6 @@ static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool } } -static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b) -{ - struct net *net = read_pnet(&b->k.net); - - spin_lock_bh(&net->xfrm.xfrm_policy_lock); - __xfrm_policy_inexact_prune_bin(b, false); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); -} - static void __xfrm_policy_inexact_flush(struct net *net) { struct xfrm_pol_inexact_bin *bin, *t; @@ -1707,12 +1698,12 @@ xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, } ret = pol; } + if (bin && delete) + __xfrm_policy_inexact_prune_bin(bin, false); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); - if (bin && delete) - xfrm_policy_inexact_prune_bin(bin); return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); |
