From 6c5c9581044dd6e0cd284ab653502fb9264f08b6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 28 Aug 2018 14:44:28 +0200 Subject: net: add napi_if_scheduled_mark_missed The function napi_if_scheduled_mark_missed is used to check if the NAPI context is scheduled, if so set NAPIF_STATE_MISSED and return true. Used by the AF_XDP zero-copy i40e Tx code implementation in order to make sure that irq affinity is honored by the napi context. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- include/linux/netdevice.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..4271f6b4e419 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n) barrier(); } +/** + * napi_if_scheduled_mark_missed - if napi is running, set the + * NAPIF_STATE_MISSED + * @n: NAPI context + * + * If napi is running, set the NAPIF_STATE_MISSED, and return true if + * NAPI is scheduled. + **/ +static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (val & NAPIF_STATE_DISABLE) + return true; + + if (!(val & NAPIF_STATE_SCHED)) + return false; + + new = val | NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return true; +} + enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, -- cgit v1.2.3 From 679c782de14bd48c19dd74cd1af20a2bc05dd936 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 22 Aug 2018 20:02:19 +0100 Subject: bpf/verifier: per-register parent pointers By giving each register its own liveness chain, we elide the skip_callee() logic. Instead, each register's parent is the state it inherits from; both check_func_call() and prepare_func_exit() automatically connect reg states to the correct chain since when they copy the reg state across (r1-r5 into the callee as args, and r0 out as the return value) they also copy the parent pointer. Signed-off-by: Edward Cree Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 38b04f559ad3..b42b60a83e19 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -41,6 +41,7 @@ enum bpf_reg_liveness { }; struct bpf_reg_state { + /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ @@ -59,7 +60,6 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* Ordering of fields matters. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -76,15 +76,15 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* parentage chain for liveness checking */ + struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like * R1=fp-8 and R2=fp-8, but one of them points to this function stack * while another to the caller's stack. To differentiate them 'frameno' * is used which is an index in bpf_verifier_state->frame[] array * pointing to bpf_func_state. - * This field must be second to last, for states_equal() reasons. */ u32 frameno; - /* This field must be last, for states_equal() reasons. */ enum bpf_reg_liveness live; }; @@ -107,7 +107,6 @@ struct bpf_stack_state { */ struct bpf_func_state { struct bpf_reg_state regs[MAX_BPF_REG]; - struct bpf_verifier_state *parent; /* index of call instruction that called into this func */ int callsite; /* stack frame number of this function state from pov of @@ -129,7 +128,6 @@ struct bpf_func_state { struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; - struct bpf_verifier_state *parent; u32 curframe; }; -- cgit v1.2.3 From 9b3004953503462a4fab31b85e44ae446d48f0bd Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Tue, 28 Aug 2018 19:56:58 +0200 Subject: ethtool: drop get_settings and set_settings callbacks Since [gs]et_settings ethtool_ops callbacks have been deprecated in February 2016, all in tree NIC drivers have been converted to provide [gs]et_link_ksettings() and out of tree drivers have had enough time to do the same. Drop get_settings() and set_settings() and implement both ETHTOOL_[GS]SET and ETHTOOL_[GS]LINKSETTINGS only using [gs]et_link_ksettings(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/ethtool.h | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f8a2245b70ac..afd9596ce636 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -183,14 +183,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, /** * struct ethtool_ops - optional netdev operations - * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings - * API. Get various device settings including Ethernet link - * settings. The @cmd parameter is expected to have been cleared - * before get_settings is called. Returns a negative error code - * or zero. - * @set_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings - * API. Set various device settings including Ethernet link - * settings. Returns a negative error code or zero. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not * implemented, the @driver and @bus_info fields will be filled in @@ -297,19 +289,16 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * a TX queue has this number, return -EINVAL. If only a RX queue or a TX * queue has this number, ignore the inapplicable fields. * Returns a negative error code or zero. - * @get_link_ksettings: When defined, takes precedence over the - * %get_settings method. Get various device settings - * including Ethernet link settings. The %cmd and - * %link_mode_masks_nwords fields should be ignored (use - * %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), any - * change to them will be overwritten by kernel. Returns a - * negative error code or zero. - * @set_link_ksettings: When defined, takes precedence over the - * %set_settings method. Set various device settings including - * Ethernet link settings. The %cmd and %link_mode_masks_nwords - * fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS - * instead of the latter), any change to them will be overwritten - * by kernel. Returns a negative error code or zero. + * @get_link_ksettings: Get various device settings including Ethernet link + * settings. The %cmd and %link_mode_masks_nwords fields should be + * ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), + * any change to them will be overwritten by kernel. Returns a negative + * error code or zero. + * @set_link_ksettings: Set various device settings including Ethernet link + * settings. The %cmd and %link_mode_masks_nwords fields should be + * ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), + * any change to them will be overwritten by kernel. Returns a negative + * error code or zero. * @get_fecparam: Get the network device Forward Error Correction parameters. * @set_fecparam: Set the network device Forward Error Correction parameters. * @get_ethtool_phy_stats: Return extended statistics about the PHY device. @@ -329,8 +318,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * of the generic netdev features interface. */ struct ethtool_ops { - int (*get_settings)(struct net_device *, struct ethtool_cmd *); - int (*set_settings)(struct net_device *, struct ethtool_cmd *); void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); -- cgit v1.2.3 From a4e0109a19c554e44c832958e426303781e1ad30 Mon Sep 17 00:00:00 2001 From: Harshitha Ramamurthy Date: Mon, 20 Aug 2018 08:12:32 -0700 Subject: virtchnl: use u8 type for a field in the virtchnl_filter struct The virtchnl_filter struct has a field called field_flags. A previous commit mistakenly had the type to be a __u8. What we want is for the field to be an unsigned 8 bit value, so let's just use the existing kernel type u8 for that. Signed-off-by: Harshitha Ramamurthy Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 212b3822d180..b41f7bc958ef 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -573,7 +573,7 @@ struct virtchnl_filter { enum virtchnl_flow_type flow_type; enum virtchnl_action action; u32 action_meta; - __u8 field_flags; + u8 field_flags; }; VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); -- cgit v1.2.3 From aa7e80b220f3a543eefbe4b7e2c5d2b73e2e2ef7 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 3 Sep 2018 20:19:28 +0300 Subject: net/mlx5: Fix atomic_mode enum values The field atomic_mode is 4 bits wide and therefore can hold values from 0x0 to 0xf. Remove the unnecessary 20 bit shift that made the values be incorrect. While that, remove unused enum values. Fixes: 57cda166bbe0 ("net/mlx5: Add DCT command interface") Signed-off-by: Moni Shoua Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7a452716de4b..d885e9f0e054 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -163,10 +163,7 @@ enum mlx5_dcbx_oper_mode { }; enum mlx5_dct_atomic_mode { - MLX5_ATOMIC_MODE_DCT_OFF = 20, - MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF, - MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF, - MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF, + MLX5_ATOMIC_MODE_DCT_CX = 2, }; enum { -- cgit v1.2.3 From 8ce78257965e6cd49720e653867e766ecd38883f Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:41 +0300 Subject: net/mlx5: Add proper NIC TX steering flow tables support Extend the ability to add steering rules to NIC TX flow tables. For now, we are only adding TX bypass (egress) which is used by the RDMA side. This will allow to shape outgoing traffic and tweak it if needed, for example performing encapsulation or rewriting headers. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 11fa4e66afc5..f2281e69ab39 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1120,6 +1120,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) +#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap) + #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) -- cgit v1.2.3 From 90c1d1b8da67330b09893d749401a45328b51704 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:42 +0300 Subject: net/mlx5: Export modify header alloc/dealloc functions Those functions will be used by the RDMA side to create modify header actions to be attached to flow steering rules via verbs. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 804516e4f483..0cbf4d5cb1ab 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -196,4 +196,10 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, + u32 modify_header_id); + #endif -- cgit v1.2.3 From 61444b458b01c95e55003d6f0b4d4c936fde51cb Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:44 +0300 Subject: net/mlx5: Break encap/decap into two separated flow table creation flags Today we are able to attach encap and decap actions only to the FDB. In preparation to enable those actions on the NIC flow tables, break the single flag into two. Those flags control whatever a decap or encap operations can be attached to the flow table created. For FDB, if encapsulation is required, we set both of them. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0cbf4d5cb1ab..0194e62ad66a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -45,7 +45,8 @@ enum { }; enum { - MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), }; #define LEFTOVERS_RULE_NUM 2 -- cgit v1.2.3 From e0e7a3861b6c6b673dc93e291ef11cf5e746b0c2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:45 +0300 Subject: net/mlx5: Move header encap type to IFC header file Those bits are hardware specification and should be defined in the IFC header file. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f043d65b9bac..bd725e0924e5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4848,6 +4848,11 @@ struct mlx5_ifc_alloc_encap_header_out_bits { u8 reserved_at_60[0x20]; }; +enum { + MLX5_HEADER_TYPE_VXLAN = 0x0, + MLX5_HEADER_TYPE_NVGRE = 0x1, +}; + struct mlx5_ifc_alloc_encap_header_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; -- cgit v1.2.3 From 60786f0987c0d9354e5330ee11615b16cdb448fe Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:46 +0300 Subject: {net, RDMA}/mlx5: Rename encap to reformat packet Renames all encap mlx5_{core,ib} code to use the new naming of packet reformat. This change doesn't introduce any function change and is needed to properly reflect the operation being done by this action. For example not only can we encapsulate a packet, but also decapsulate it. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 4 ++-- include/linux/mlx5/mlx5_ifc.h | 50 +++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0194e62ad66a..37d0c08d0966 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -45,7 +45,7 @@ enum { }; enum { - MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0), + MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), }; @@ -160,7 +160,7 @@ struct mlx5_flow_act { u32 action; bool has_flow_tag; u32 flow_tag; - u32 encap_id; + u32 reformat_id; u32 modify_id; uintptr_t esp_id; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bd725e0924e5..c79eaae28e59 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -243,8 +243,8 @@ enum { MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e, MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942, @@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 encap[0x1]; + u8 reformat[0x1]; u8 decap[0x1]; u8 reserved_at_9[0x1]; u8 pop_vlan[0x1]; @@ -599,7 +599,7 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vxlan_encap_decap[0x1]; u8 nvgre_encap_decap[0x1]; u8 reserved_at_22[0x9]; - u8 log_max_encap_headers[0x5]; + u8 log_max_packet_reformat_context[0x5]; u8 reserved_2b[0x6]; u8 max_encap_header_size[0xa]; @@ -2394,7 +2394,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, - MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10, MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40, MLX5_FLOW_CONTEXT_ACTION_VLAN_POP = 0x80, @@ -2427,7 +2427,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_a0[0x8]; u8 flow_counter_list_size[0x18]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 modify_header_id[0x20]; @@ -4802,19 +4802,19 @@ struct mlx5_ifc_query_eq_in_bits { u8 reserved_at_60[0x20]; }; -struct mlx5_ifc_encap_header_in_bits { +struct mlx5_ifc_packet_reformat_context_in_bits { u8 reserved_at_0[0x5]; - u8 header_type[0x3]; + u8 reformat_type[0x3]; u8 reserved_at_8[0xe]; - u8 encap_header_size[0xa]; + u8 reformat_data_size[0xa]; u8 reserved_at_20[0x10]; - u8 encap_header[2][0x8]; + u8 reformat_data[2][0x8]; - u8 more_encap_header[0][0x8]; + u8 more_reformat_data[0][0x8]; }; -struct mlx5_ifc_query_encap_header_out_bits { +struct mlx5_ifc_query_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4822,38 +4822,38 @@ struct mlx5_ifc_query_encap_header_out_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_encap_header_in_bits encap_header[0]; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0]; }; -struct mlx5_ifc_query_encap_header_in_bits { +struct mlx5_ifc_query_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_at_60[0xa0]; }; -struct mlx5_ifc_alloc_encap_header_out_bits { +struct mlx5_ifc_alloc_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_at_60[0x20]; }; enum { - MLX5_HEADER_TYPE_VXLAN = 0x0, - MLX5_HEADER_TYPE_NVGRE = 0x1, + MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, + MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, }; -struct mlx5_ifc_alloc_encap_header_in_bits { +struct mlx5_ifc_alloc_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -4862,10 +4862,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_encap_header_in_bits encap_header; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context; }; -struct mlx5_ifc_dealloc_encap_header_out_bits { +struct mlx5_ifc_dealloc_packet_reformat_context_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4874,14 +4874,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_dealloc_encap_header_in_bits { +struct mlx5_ifc_dealloc_packet_reformat_context_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_20[0x10]; u8 op_mod[0x10]; - u8 encap_id[0x20]; + u8 packet_reformat_id[0x20]; u8 reserved_60[0x20]; }; @@ -6983,7 +6983,7 @@ struct mlx5_ifc_create_flow_table_out_bits { }; struct mlx5_ifc_flow_table_context_bits { - u8 encap_en[0x1]; + u8 reformat_en[0x1]; u8 decap_en[0x1]; u8 reserved_at_2[0x2]; u8 table_miss_action[0x4]; -- cgit v1.2.3 From bea4e1f6c6c5744d467ebf8b0699f5e391835130 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:47 +0300 Subject: net/mlx5: Expose new packet reformat capabilities Expose new abilities when creating a packet reformat context. The new types which can be created are: MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: Ability to create generic encap operation to be done by the HW. MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: Ability to create generic decap operation where the inner packet doesn't contain L2. MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: Ability to create generic encap operation to be done by the HW. The L2 of the original packet is dropped. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c79eaae28e59..3a4a2e0567e9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_c[0x1]; u8 pop_vlan_2[0x1]; u8 push_vlan_2[0x1]; - u8 reserved_at_f[0x11]; - + u8 reformat_and_vlan_action[0x1]; + u8 reserved_at_10[0x2]; + u8 reformat_l3_tunnel_to_l2[0x1]; + u8 reformat_l2_to_l3_tunnel[0x1]; + u8 reformat_and_modify_action[0x1]; + u8 reserved_at_14[0xb]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1fd]; + u8 reserved_at_3[0x1d]; + u8 encap_general_header[0x1]; + u8 reserved_at_21[0xa]; + u8 log_max_packet_reformat_context[0x5]; + u8 reserved_at_30[0x6]; + u8 max_encap_header_size[0xa]; + u8 reserved_at_40[0x1c0]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; @@ -4851,6 +4861,9 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits { enum { MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0, MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1, + MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, + MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, }; struct mlx5_ifc_alloc_packet_reformat_context_in_bits { -- cgit v1.2.3 From 50acec06f3928fc29647aecf1270e54cae583afb Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 28 Aug 2018 14:18:49 +0300 Subject: net/mlx5: Export packet reformat alloc/dealloc functions This will allow for the RDMA side to allocate packet reformat context. Signed-off-by: Mark Bloch Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 37d0c08d0966..b1c026f1c8ba 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -203,4 +203,13 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); +int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + u32 *packet_reformat_id); +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + u32 packet_reformat_id); + #endif -- cgit v1.2.3 From 03512ceb60ae4be71ed3129dabb8625224c8ec40 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Fri, 31 Aug 2018 11:31:09 +0300 Subject: ieee80211: remove redundant leading zeroes The defines of IEEE80211_HE_OPERATION_VHT_OPER_INFO and IEEE80211_HE_OPERATION_MULTI_BSSID_AP have leading zeroes that makes the number look like it is bigger than 32 bit. This is misleading, remove it. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9c03a7d5e400..17ea51d088ae 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1963,8 +1963,8 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000200 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK 0x000ffc00 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET 10 -#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x000100000 -#define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x000200000 +#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x00100000 +#define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x00200000 #define IEEE80211_HE_OPERATION_MULTI_BSSID_AP 0x10000000 #define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR 0x20000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x40000000 -- cgit v1.2.3 From b0aa75f0b1b2e6bc77128fab36c8ed87e84917cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 31 Aug 2018 11:31:16 +0300 Subject: ieee80211: add new VHT capability fields/parsing IEEE 802.11-2016 extended the VHT capability fields to allow indicating the number of spatial streams depending on the actually used bandwidth, add support for decoding this. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 17ea51d088ae..280600a10111 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1460,13 +1460,16 @@ struct ieee80211_ht_operation { * STA can receive. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. - * The top 3 bits of this field are reserved. + * The top 3 bits of this field indicate the Maximum NSTS,total + * (a beamformee capability.) * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams * @tx_highest: Indicates highest long GI VHT PPDU data rate * STA can transmit. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest TX data rate supported. - * The top 3 bits of this field are reserved. + * The top 2 bits of this field are reserved, the + * 3rd bit from the top indiciates VHT Extended NSS BW + * Capability. */ struct ieee80211_vht_mcs_info { __le16 rx_mcs_map; @@ -1475,6 +1478,13 @@ struct ieee80211_vht_mcs_info { __le16 tx_highest; } __packed; +/* for rx_highest */ +#define IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT 13 +#define IEEE80211_VHT_MAX_NSTS_TOTAL_MASK (7 << IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT) + +/* for tx_highest */ +#define IEEE80211_VHT_EXT_NSS_BW_CAPABLE (1 << 13) + /** * enum ieee80211_vht_mcs_support - VHT MCS support definitions * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the @@ -1650,6 +1660,7 @@ struct ieee80211_mu_edca_param_set { #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK 0x0000000C +#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_SHIFT 2 #define IEEE80211_VHT_CAP_RXLDPC 0x00000010 #define IEEE80211_VHT_CAP_SHORT_GI_80 0x00000020 #define IEEE80211_VHT_CAP_SHORT_GI_160 0x00000040 @@ -1678,6 +1689,26 @@ struct ieee80211_mu_edca_param_set { #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB 0x0c000000 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN 0x10000000 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN 0x20000000 +#define IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT 30 +#define IEEE80211_VHT_CAP_EXT_NSS_BW_MASK 0xc0000000 + +/** + * ieee80211_get_vht_max_nss - return max NSS for a given bandwidth/MCS + * @cap: VHT capabilities of the peer + * @bw: bandwidth to use + * @mcs: MCS index to use + * @ext_nss_bw_capable: indicates whether or not the local transmitter + * (rate scaling algorithm) can deal with the new logic + * (dot11VHTExtendedNSSBWCapable) + * + * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can + * vary for a given BW/MCS. This function parses the data. + * + * Note: This function is exported by cfg80211. + */ +int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, + enum ieee80211_vht_chanwidth bw, + int mcs, bool ext_nss_bw_capable); /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 -- cgit v1.2.3 From add7453ad62f05c8f1a48675bb4dfed52e6ac878 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 5 Sep 2018 08:06:08 +0300 Subject: wireless: align to draft 11ax D3.0 Align to new 11ax draft D3.0. Change/add new MAC and PHY capabilities and update drivers' 11ax capabilities and mac80211's debugfs accordingly. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 72 ++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 280600a10111..c4809ad8ab46 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1555,11 +1555,11 @@ struct ieee80211_vht_operation { * struct ieee80211_he_cap_elem - HE capabilities element * * This structure is the "HE capabilities element" fixed fields as - * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3 + * described in P802.11ax_D3.0 section 9.4.2.237.2 and 9.4.2.237.3 */ struct ieee80211_he_cap_elem { - u8 mac_cap_info[5]; - u8 phy_cap_info[9]; + u8 mac_cap_info[6]; + u8 phy_cap_info[11]; } __packed; #define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN 5 @@ -1738,15 +1738,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US 0x04 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US 0x08 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK 0x0c -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1 0x00 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2 0x10 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3 0x20 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4 0x30 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5 0x40 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6 0x50 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7 0x60 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8 0x70 -#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK 0x70 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1 0x00 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_2 0x10 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_3 0x20 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_4 0x30 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_5 0x40 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_6 0x50 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_7 0x60 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8 0x70 +#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_MASK 0x70 /* Link adaptation is split between byte HE_MAC_CAP1 and * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE @@ -1760,14 +1760,13 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION 0x01 #define IEEE80211_HE_MAC_CAP2_ALL_ACK 0x02 -#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED 0x04 +#define IEEE80211_HE_MAC_CAP2_TRS 0x04 #define IEEE80211_HE_MAC_CAP2_BSR 0x08 #define IEEE80211_HE_MAC_CAP2_BCAST_TWT 0x10 #define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP 0x20 #define IEEE80211_HE_MAC_CAP2_MU_CASCADING 0x40 #define IEEE80211_HE_MAC_CAP2_ACK_EN 0x80 -#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU 0x01 #define IEEE80211_HE_MAC_CAP3_OMI_CONTROL 0x02 #define IEEE80211_HE_MAC_CAP3_OFDMA_RA 0x04 @@ -1775,25 +1774,34 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the * same field in the HE capabilities. */ -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT 0x00 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1 0x08 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2 0x10 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED 0x18 -#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK 0x18 -#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG 0x20 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT 0x00 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1 0x08 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2 0x10 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED 0x18 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK 0x18 +#define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG 0x20 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 -#define IEEE80211_HE_MAC_CAP4_SR_RESP 0x08 +#define IEEE80211_HE_MAC_CAP4_SRP_RESP 0x08 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 #define IEEE80211_HE_MAC_CAP4_OPS 0x20 #define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU 0x40 +/* Multi TID agg TX is split between byte #4 and #5 + * The value is a combination of B39,B40,B41 + */ +#define IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39 0x80 + +#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 0x01 +#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 0x02 +#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION 0x04 +#define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU 0x08 +#define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX 0x10 /* 802.11ax HE PHY capabilities */ -#define IEEE80211_HE_PHY_CAP0_DUAL_BAND 0x01 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 @@ -1810,10 +1818,10 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A 0x10 #define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD 0x20 #define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US 0x40 -/* Midamble RX Max NSTS is split between byte #2 and byte #3 */ -#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS 0x80 +/* Midamble RX/TX Max NSTS is split between byte #2 and byte #3 */ +#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS 0x80 -#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS 0x01 +#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS 0x01 #define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US 0x02 #define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ 0x04 #define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ 0x08 @@ -1914,7 +1922,19 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU 0x04 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU 0x08 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI 0x10 -#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF 0x20 +#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF 0x20 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ 0x00 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ 0x40 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ 0x80 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ 0xc0 +#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK 0xc0 + +#define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM 0x01 +#define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK 0x02 +#define IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU 0x04 +#define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 +#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 +#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 /* 802.11ax HE TX/RX MCS NSS Support */ #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS (3) -- cgit v1.2.3 From 83033688b7ade18d2dbbcefa810f02ff66ba549d Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 23 Jul 2018 10:55:39 +0300 Subject: net/mlx5: Change flow counters addlist type to single linked list In order to prevent flow counters stats work function from traversing whole flow counters tree while searching for deleted flow counters, new list to store deleted flow counters will be added to struct mlx5_fc_stats. However, the flow counter structure itself has no space left to store any more data in first cache line. To free space that is needed to store additional list node, convert current addlist double linked list (two pointers per node) to atomic single linked list (one pointer per node). Lockless NULL-terminated single linked list data type doesn't require any additional external synchronization for operations used by flow counters module (add single new element, remove all elements from list and traverse them). Remove addlist_lock that is no longer needed. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7a452716de4b..c00549293982 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -584,9 +584,7 @@ struct mlx5_irq_info { struct mlx5_fc_stats { struct rb_root counters; - struct list_head addlist; - /* protect addlist add/splice operations */ - spinlock_t addlist_lock; + struct llist_head addlist; struct workqueue_struct *wq; struct delayed_work work; -- cgit v1.2.3 From 6e5e22839136fdb466af0aa46ff2404713dff974 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 23 Jul 2018 11:32:05 +0300 Subject: net/mlx5: Add new list to store deleted flow counters In order to prevent flow counters stats work function from traversing whole flow counters tree while searching for deleted flow counters, new list to store deleted flow counters is added to struct mlx5_fc_stats. Lockless NULL-terminated single linked list data type is used due to following reasons: - This use case only needs to add single element to list and remove/iterate whole list. Lockless list doesn't require any additional synchronization for these operations. - First cache line of flow counter data structure only has space to store single additional pointer, which precludes usage of double linked list. Remove flow counter 'deleted' flag that is no longer needed. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c00549293982..4b53ac64004b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -585,6 +585,7 @@ struct mlx5_irq_info { struct mlx5_fc_stats { struct rb_root counters; struct llist_head addlist; + struct llist_head dellist; struct workqueue_struct *wq; struct delayed_work work; -- cgit v1.2.3 From 9aff93d7d0d4b3f3076d7bd12a4ad06ef1cf9804 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Jul 2018 09:52:11 +0300 Subject: net/mlx5: Store flow counters in a list In order to improve performance of flow counter stats query loop that traverses all configured flow counters, replace rb_tree with double-linked list. This change improves performance of traversing flow counters by removing the tree traversal. (profiling data showed that call to rb_next was most top CPU consumer) However, lookup of flow flow counter in list becomes linear, instead of logarithmic. This problem is fixed by next patch in series, which adds idr for fast lookup. Idr is to be used because it is not an intrusive data structure and doesn't require adding any new members to struct mlx5_fc, which allows its control data part to stay <= 1 cache line in size. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 4b53ac64004b..61bed33e6675 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -583,7 +583,7 @@ struct mlx5_irq_info { }; struct mlx5_fc_stats { - struct rb_root counters; + struct list_head counters; struct llist_head addlist; struct llist_head dellist; -- cgit v1.2.3 From 12d6066c3b29c5606c4a2466f964fbd9ede803c5 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 24 Jul 2018 16:37:40 +0300 Subject: net/mlx5: Add flow counters idr Previous patch in series changed flow counter storage structure from rb_tree to linked list in order to improve flow counter traversal performance. The drawback of such solution is that flow counter lookup by id becomes linear in complexity. Store pointers to flow counters in idr in order to improve lookup performance to logarithmic again. Idr is non-intrusive data structure and doesn't require extending flow counter struct with new elements. This means that idr can be used for lookup, while linked list from previous patch is used for traversal, and struct mlx5_fc size is <= 2 cache lines. Signed-off-by: Vlad Buslov Acked-by: Amir Vadai Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 61bed33e6675..2a0c845f6bdb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -583,6 +583,8 @@ struct mlx5_irq_info { }; struct mlx5_fc_stats { + spinlock_t counters_idr_lock; /* protects counters_idr */ + struct idr counters_idr; struct list_head counters; struct llist_head addlist; struct llist_head dellist; -- cgit v1.2.3 From 64109f1dc41f25f4a9c6b114e04b6266bf4128ad Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 5 Jun 2018 09:22:18 +0300 Subject: net/mlx5e: Replace PTP clock lock from RW lock to seq lock Changed "priv.clock.lock" lock from 'rw_lock' to 'seq_lock' in order to improve packet rate performance. Tested on Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz. Sent 64b packets between two peers connected by ConnectX-5, and measured packet rate for the receiver in three modes: no time-stamping (base rate) time-stamping using rw_lock (old lock) for critical region time-stamping using seq_lock (new lock) for critical region Only the receiver time stamped its packets. The measured packet rate improvements are: Single flow (multiple TX rings to single RX ring): without timestamping: 4.26 (M packets)/sec with rw-lock (old lock): 4.1 (M packets)/sec with seq-lock (new lock): 4.16 (M packets)/sec 1.46% improvement Multiple flows (multiple TX rings to six RX rings): without timestamping: 22 (M packets)/sec with rw-lock (old lock): 11.7 (M packets)/sec with seq-lock (new lock): 21.3 (M packets)/sec 82.05% improvement The packet rate improvement is due to the lack of atomic operations for the 'readers' by the seq-lock. Since there are much more 'readers' than 'writers' contention on this lock, almost all atomic operations are saved. this results in a dramatic decrease in overall cache misses. Signed-off-by: Shay Agroskin Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2a0c845f6bdb..b7fce2c9443d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -805,7 +805,7 @@ struct mlx5_pps { }; struct mlx5_clock { - rwlock_t lock; + seqlock_t lock; struct cyclecounter cycles; struct timecounter tc; struct hwtstamp_config hwtstamp_config; -- cgit v1.2.3 From fa788d986a3aac5069378ed04697bd06f83d3488 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 3 Sep 2018 16:23:36 +0200 Subject: packet: add sockopt to ignore outgoing packets Currently, the only way to ignore outgoing packets on a packet socket is via the BPF filter. With MSG_ZEROCOPY, packets that are looped into AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even if the filter run from packet_rcv() would reject them. So the presence of a packet socket on the interface takes away the benefits of MSG_ZEROCOPY, even if the packet socket is not interested in outgoing packets. (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily cloned, but the cost for that is much lower.) Add a socket option to allow AF_PACKET sockets to ignore outgoing packets to solve this. Note that the *BSDs already have something similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT. The first intended user is lldpd. Signed-off-by: Vincent Whitchurch Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4271f6b4e419..e2b3bd750c98 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2343,6 +2343,7 @@ static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, struct packet_type { __be16 type; /* This is really htons(ether_type). */ + bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ int (*func) (struct sk_buff *, struct net_device *, -- cgit v1.2.3 From a3f723079df85eafc10c628dabdfcf374b8e1523 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Wed, 5 Sep 2018 18:35:55 +0300 Subject: qed*: Utilize FW 8.37.7.0 This patch adds a new qed firmware with fixes and support for new features. Fixes: - Fix a rare case of device crash with iWARP, iSCSI or FCoE offload. - Fix GRE tunneled traffic when iWARP offload is enabled. - Fix RoCE failure in ib_send_bw when using inline data. - Fix latency optimization flow for inline WQEs. - BigBear 100G fix RDMA: - Reduce task context size. - Application page sizes above 2GB support. - Performance improvements. ETH: - Tenant DCB support. - Replace RSS indirection table update interface. Misc: - Debug Tools changes. Signed-off-by: Denis Bolotin Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 10 +++++----- include/linux/qed/iscsi_common.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 0081fa6d1268..03f59a28fefd 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -110,7 +110,7 @@ #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 37 -#define FW_REVISION_VERSION 2 +#define FW_REVISION_VERSION 7 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -931,12 +931,12 @@ struct db_rdma_dpm_params { #define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT 16 #define DB_RDMA_DPM_PARAMS_RESERVED0_MASK 0x1 #define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT 27 -#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 -#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_SHIFT 28 #define DB_RDMA_DPM_PARAMS_S_FLG_MASK 0x1 #define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT 29 -#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK 0x1 -#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT 30 +#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT 30 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK 0x1 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT 31 }; diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index b34c573f2b30..66aba505ec56 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -896,7 +896,7 @@ struct e4_ustorm_iscsi_task_ag_ctx { __le32 exp_cont_len; __le32 total_data_acked; __le32 exp_data_acked; - u8 next_tid_valid; + u8 byte2; u8 byte3; __le16 word1; __le16 next_tid; -- cgit v1.2.3 From 8b69bd7d8a8927d537f134c37bcca6cbfa58e1b2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Aug 2018 18:43:38 -0700 Subject: ppp: Remove direct skb_queue_head list pointer access. Add a helper, __skb_peek(), and use it in ppp_mp_reconstruct(). Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17a13e4785fc..89283b77294d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1592,6 +1592,17 @@ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) return skb; } +/** + * __skb_peek - peek at the head of a non-empty &sk_buff_head + * @list_: list to peek at + * + * Like skb_peek(), but the caller knows that the list is not empty. + */ +static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_) +{ + return list_->next; +} + /** * skb_peek_next - peek skb following the given one from a queue * @skb: skb to start from -- cgit v1.2.3 From a8305bff685252e80b7c60f4f5e7dd2e63e38218 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 29 Jul 2018 20:42:53 -0700 Subject: net: Add and use skb_mark_not_on_list(). An SKB is not on a list if skb->next is NULL. Codify this convention into a helper function and use it where we are dequeueing an SKB and need to mark it as such. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 89283b77294d..c4c9e3f5cd9a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1339,6 +1339,11 @@ static inline void skb_zcopy_abort(struct sk_buff *skb) } } +static inline void skb_mark_not_on_list(struct sk_buff *skb) +{ + skb->next = NULL; +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head -- cgit v1.2.3 From 992cba7e276d438ac8b0a8c17b147b37c8c286f7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2018 15:27:56 -0700 Subject: net: Add and use skb_list_del_init(). It documents what is happening, and eliminates the spurious list pointer poisoning. In the long term, in order to get proper list head debugging, we might want to use the list poison value as the indicator that an SKB is a singleton and not on a list. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c4c9e3f5cd9a..e3a53ca4a9b5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1344,6 +1344,12 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) skb->next = NULL; } +static inline void skb_list_del_init(struct sk_buff *skb) +{ + __list_del_entry(&skb->list); + skb_mark_not_on_list(skb); +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head -- cgit v1.2.3 From 41124fa64d4b298b82266b7ddbefc43540b77b44 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:14 +0200 Subject: net: ethernet: Add helper to remove a supported link mode Some MAC hardware cannot support a subset of link modes. e.g. often 1Gbps Full duplex is supported, but Half duplex is not. Add a helper to remove such a link mode. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index cd6f637cbbfb..9c4c3eca8cf2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1049,6 +1049,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); +void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From af8d9bb2f2f405ad541794b46f9d7bc70f13e5cb Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:15 +0200 Subject: net: ethernet: Add helper for MACs which support asym pause Rather than have the MAC drivers manipulate phydev members to indicate they support Asym Pause, add a helper function. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9c4c3eca8cf2..e2db819807c1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); +void phy_support_asym_pause(struct phy_device *phydev); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From c306ad36184fb7d0bd53f45441f45c1810e88a53 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:16 +0200 Subject: net: ethernet: Add helper for MACs which support pause Rather than have the MAC drivers manipulate phydev members, add a helper function for MACs supporting Pause, but not Asym Pause. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e2db819807c1..bc5d6c3f1388 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); +void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, -- cgit v1.2.3 From 70814e819c1139e5e7faacb3700eab5eac559272 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:17 +0200 Subject: net: ethernet: Add helper for set_pauseparam for Asym Pause ethtool can be used to enable/disable pause. Add a helper to configure the PHY when asym pause is supported. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index bc5d6c3f1388..e4062ba7472f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1052,6 +1052,7 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 0c122405d4c3ec638ba00865c872ec5a3ed1a6c0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:18 +0200 Subject: net: ethernet: Add helper for set_pauseparam for Pause ethtool can be used to enable/disable pause. Add a helper to configure the PHY when Pause is supported. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e4062ba7472f..8521391ebb20 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1052,6 +1052,8 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, + bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, -- cgit v1.2.3 From 22b7d29926b577ff4f480611380d03268545b787 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 12 Sep 2018 01:53:19 +0200 Subject: net: ethernet: Add helper to determine if pause configuration is supported Rather than have MAC drivers open code the test, add a helper in phylib. This will help when we change the type of phydev->supported. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8521391ebb20..192a1fa0c73b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1055,6 +1055,8 @@ void phy_support_asym_pause(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); +bool phy_validate_pause(struct phy_device *phydev, + struct ethtool_pauseparam *pp); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 15033f0457dca569b284bef0c8d3ad55fb37eacb Mon Sep 17 00:00:00 2001 From: Andre Naujoks Date: Mon, 10 Sep 2018 10:27:15 +0200 Subject: ipv6: Add sockopt IPV6_MULTICAST_ALL analogue to IP_MULTICAST_ALL The socket option will be enabled by default to ensure current behaviour is not changed. This is the same for the IPv4 version. A socket bound to in6addr_any and a specific port will receive all traffic on that port. Analogue to IP_MULTICAST_ALL, disable this behaviour, if one or more multicast groups were joined (using said socket) and only pass on multicast traffic from groups, which were explicitly joined via this socket. Without this option disabled a socket (system even) joined to multiple multicast groups is very hard to get right. Filtering by destination address has to take place in user space to avoid receiving multicast traffic from other multicast groups, which might have traffic on the same port. The extension of the IP_MULTICAST_ALL socketoption to just apply to ipv6, too, is not done to avoid changing the behaviour of current applications. Signed-off-by: Andre Naujoks Acked-By: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8415bf1a9776..495e834c1367 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -274,7 +274,8 @@ struct ipv6_pinfo { */ dontfrag:1, autoflowlabel:1, - autoflowlabel_set:1; + autoflowlabel_set:1, + mc_all:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; -- cgit v1.2.3 From fe8dd45bb7556246c6b76277b1ba4296c91c2505 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 12 Sep 2018 11:17:06 +0800 Subject: tun: switch to new type of msg_control This patch introduces to a new tun/tap specific msg_control: #define TUN_MSG_UBUF 1 #define TUN_MSG_PTR 2 struct tun_msg_ctl { int type; void *ptr; }; This allows us to pass different kinds of msg_control through sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will be used by the existed vhost_net zerocopy code. The second is XDP buff, which allows vhost_net to pass XDP buff to TUN. This could be used to implement accepting an array of XDP buffs from vhost_net in the following patches. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/if_tun.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 3d2996dc7d85..12e3eebf0ce6 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -16,9 +16,23 @@ #define __IF_TUN_H #include +#include #define TUN_XDP_FLAG 0x1UL +#define TUN_MSG_UBUF 1 +#define TUN_MSG_PTR 2 +struct tun_msg_ctl { + unsigned short type; + unsigned short num; + void *ptr; +}; + +struct tun_xdp_hdr { + int buflen; + struct virtio_net_hdr gso; +}; + #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); -- cgit v1.2.3 From 52bb6677d530d37055092d86b4eab69dce6c166a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 14 Sep 2018 16:00:51 +0800 Subject: net: move definition of pcpu_lstats to header file pcpu_lstats is defined in several files, so unify them as one and move to header file Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2b3bd750c98..baed5d5088c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2382,6 +2382,12 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; +struct pcpu_lstats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ -- cgit v1.2.3 From d58e468b1112dcd1d5193c0a89ff9f98b5a3e8b9 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 14 Sep 2018 07:46:18 -0700 Subject: flow_dissector: implements flow dissector BPF hook Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector path. The BPF program is per-network namespace. Signed-off-by: Petar Penkov Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + include/linux/skbuff.h | 7 +++++++ 3 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 523481a3471b..988a00797bcd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -212,6 +212,7 @@ enum bpf_reg_type { PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ + PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ }; /* The information passed from prog-specific *_is_valid_access diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index cd26c090e7c0..22083712dd18 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) #endif +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 17a13e4785fc..ce0e863f02a2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -243,6 +243,8 @@ struct scatterlist; struct pipe_inode_info; struct iov_iter; struct napi_struct; +struct bpf_prog; +union bpf_attr; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); +int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog); + +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); + bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, -- cgit v1.2.3 From 2953d80ff04862b26a2e628fb3948868f54d753d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 31 Aug 2018 20:29:37 +0200 Subject: netfilter: remove obsolete need_conntrack stub as of a0ae2562c6c4b27 ("netfilter: conntrack: remove l3proto abstraction") there are no users anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 03097fa70975..e142b2b5f1ea 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -19,7 +19,4 @@ struct ip_conntrack_stat { unsigned int search_restart; }; -/* call to create an explicit dependency on nf_conntrack. */ -void need_conntrack(void); - #endif /* _NF_CONNTRACK_COMMON_H */ -- cgit v1.2.3 From 14d73416792afa84f6a7245ee474d2432069da56 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 17 Sep 2018 18:46:55 +0800 Subject: veth: rename pcpu_vstats as pcpu_lstats struct pcpu_vstats and pcpu_lstats have same members and usage, and pcpu_lstats is used in many files, so rename pcpu_vstats as pcpu_lstats to reduce duplicate definition Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index baed5d5088c5..1cbbf77a685f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2000,7 +2000,6 @@ struct net_device { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; - struct pcpu_vstats __percpu *vstats; }; #if IS_ENABLED(CONFIG_GARP) -- cgit v1.2.3 From 2dfd184abd38fd72d80715fa8b00c9de78490200 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 18 Sep 2018 16:20:18 -0400 Subject: flow_dissector: fix build failure without CONFIG_NET If boolean CONFIG_BPF_SYSCALL is enabled, kernel/bpf/syscall.c will call flow_dissector functions from net/core/flow_dissector.c. This causes this build failure if CONFIG_NET is disabled: kernel/bpf/syscall.o: In function `__x64_sys_bpf': syscall.c:(.text+0x3278): undefined reference to `skb_flow_dissector_bpf_prog_attach' syscall.c:(.text+0x3310): undefined reference to `skb_flow_dissector_bpf_prog_detach' kernel/bpf/syscall.o:(.rodata+0x3f0): undefined reference to `flow_dissector_prog_ops' kernel/bpf/verifier.o:(.rodata+0x250): undefined reference to `flow_dissector_verifier_ops' Analogous to other optional BPF program types in syscall.c, add stubs if the relevant functions are not compiled and move the BPF_PROG_TYPE definition in the #ifdef CONFIG_NET block. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Reported-by: Randy Dunlap Signed-off-by: Willem de Bruijn Acked-by: Randy Dunlap # build-tested Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- include/linux/bpf_types.h | 2 +- include/linux/skbuff.h | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 22083712dd18..c9bd6fb765b0 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -16,6 +16,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) @@ -32,7 +33,6 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) #ifdef CONFIG_INET BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) #endif -BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ce0e863f02a2..76be85ea392a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1194,10 +1194,23 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); +#ifdef CONFIG_NET int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); +#else +static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} +#endif bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, -- cgit v1.2.3 From 5d773ff41a7cdf0ef6cc6647435d59f0cf53e7b1 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 17 Sep 2018 13:30:46 +0300 Subject: net/mlx5: Rename incorrect naming in IFC file Remove a trailing underscore from the multicast/unicast names. Signed-off-by: Mark Bloch Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3a4a2e0567e9..4c7a1d25d73b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2559,8 +2559,8 @@ enum { }; enum { - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_ = 0x1, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_ = 0x2, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST = 0x1, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST = 0x2, }; struct mlx5_ifc_tirc_bits { -- cgit v1.2.3 From 9799ccb0e984a5c1311b22a212e7ff96e8b736de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 08:51:49 -0700 Subject: tcp: add tcp_wstamp_ns socket field TCP will soon provide earliest departure time on TX skbs. It needs to track this in a new variable. tcp_mstamp_refresh() needs to update this variable, and became too big to stay an inline. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 263e37271afd..848f5b25e178 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -248,6 +248,8 @@ struct tcp_sock { syn_smc:1; /* SYN includes SMC */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + /* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ -- cgit v1.2.3 From d3edd06ea8ea9e03de6567fda80b8be57e21a537 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 08:51:50 -0700 Subject: tcp: provide earliest departure time in skb->tstamp Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns, from usec units to nsec units. Do not clear skb->tstamp before entering IP stacks in TX, so that qdisc or devices can implement pacing based on the earliest departure time instead of socket sk->sk_pacing_rate Packets are fed with tcp_wstamp_ns, and following patch will update tcp_wstamp_ns when both TCP and sch_fq switch to the earliest departure time mechanism. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e3a53ca4a9b5..86f337e9a81d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -689,7 +689,7 @@ struct sk_buff { union { ktime_t tstamp; - u64 skb_mstamp; + u64 skb_mstamp_ns; /* earliest departure time */ }; /* * This is the control buffer. It is free to use for every -- cgit v1.2.3 From 9ba481e2eb3b932ae5b6278342b256e4f92d2793 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:20 +0300 Subject: net/mlx5: Set uid as part of CQ commands Set uid as part of CQ commands so that the firmware can manage the CQ object in a secured way. The firmware should mark this CQ with the given uid so that it can be used later on only by objects with the same uid. Upon DEVX flows that use this CQ (e.g. create QP command), the pointed CQ must have the same uid as of the issuer uid command. When a command is issued with uid=0 it means that the issuer of the command is trusted (i.e. kernel), in that case any pointed object can be used regardless of its uid. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/cq.h | 1 + include/linux/mlx5/mlx5_ifc.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 0ef6138eca49..31a750570c38 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -61,6 +61,7 @@ struct mlx5_core_cq { int reset_notify_added; struct list_head reset_notify; struct mlx5_eq *eq; + u16 uid; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4c7a1d25d73b..1a412b355054 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5629,7 +5629,7 @@ enum { struct mlx5_ifc_modify_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6404,7 +6404,7 @@ struct mlx5_ifc_destroy_cq_out_bits { struct mlx5_ifc_destroy_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7164,7 +7164,7 @@ struct mlx5_ifc_create_cq_out_bits { struct mlx5_ifc_create_cq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From 4ac63ec72587f7426aae15ddfe78e8ab785724dc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:21 +0300 Subject: net/mlx5: Set uid as part of QP commands Set uid as part of QP commands so that the firmware can manage the QP object in a secured way. That will enable using a QP that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++----------- include/linux/mlx5/qp.h | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1a412b355054..3b6a612787ac 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3394,7 +3394,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits { struct mlx5_ifc_sqerr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3424,7 +3424,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits { struct mlx5_ifc_sqd2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3629,7 +3629,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits { struct mlx5_ifc_rts2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3659,7 +3659,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits { struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -3689,7 +3689,7 @@ struct mlx5_ifc_rst2init_qp_out_bits { struct mlx5_ifc_rst2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5192,7 +5192,7 @@ struct mlx5_ifc_qp_2rst_out_bits { struct mlx5_ifc_qp_2rst_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5214,7 +5214,7 @@ struct mlx5_ifc_qp_2err_out_bits { struct mlx5_ifc_qp_2err_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5789,7 +5789,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits { struct mlx5_ifc_init2rtr_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5819,7 +5819,7 @@ struct mlx5_ifc_init2init_qp_out_bits { struct mlx5_ifc_init2init_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6230,7 +6230,7 @@ struct mlx5_ifc_destroy_qp_out_bits { struct mlx5_ifc_destroy_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6895,7 +6895,7 @@ struct mlx5_ifc_create_qp_out_bits { struct mlx5_ifc_create_qp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4778d41085d4..fbe322c966bc 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -471,6 +471,7 @@ struct mlx5_core_qp { int qpn; struct mlx5_rsc_debug *dbg; int pid; + u16 uid; }; struct mlx5_core_dct { -- cgit v1.2.3 From d269b3afffcb107375d9cf73127fc2e0181bc90b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:22 +0300 Subject: net/mlx5: Set uid as part of RQ commands Set uid as part of RQ commands so that the firmware can manage the RQ object in a secured way. That will enable using an RQ that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3b6a612787ac..689631ca27b8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5488,7 +5488,7 @@ enum { struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6164,7 +6164,7 @@ struct mlx5_ifc_destroy_rq_out_bits { struct mlx5_ifc_destroy_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6847,7 +6847,7 @@ struct mlx5_ifc_create_rq_out_bits { struct mlx5_ifc_create_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From 430ae0d5a3ce1350375690cb6ab29ab6fae4a9ac Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:23 +0300 Subject: net/mlx5: Set uid as part of SQ commands Set uid as part of SQ commands so that the firmware can manage the SQ object in a secured way. That will enable using an SQ that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 689631ca27b8..72dd1e49a799 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5381,7 +5381,7 @@ struct mlx5_ifc_modify_sq_out_bits { struct mlx5_ifc_modify_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6096,7 +6096,7 @@ struct mlx5_ifc_destroy_sq_out_bits { struct mlx5_ifc_destroy_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6769,7 +6769,7 @@ struct mlx5_ifc_create_sq_out_bits { struct mlx5_ifc_create_sq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From a0d8c054318976927493ffd26055d9d183c9beec Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:24 +0300 Subject: net/mlx5: Set uid as part of SRQ commands Set uid as part of SRQ commands so that the firmware can manage the SRQ object in a secured way. That will enable using an SRQ that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++----------- include/linux/mlx5/srq.h | 1 + 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d885e9f0e054..8fb072aa8671 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -474,6 +474,7 @@ struct mlx5_core_srq { atomic_t refcount; struct completion free; + u16 uid; }; struct mlx5_eq_table { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 72dd1e49a799..85f1237d80db 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5524,7 +5524,7 @@ struct mlx5_ifc_rmp_bitmask_bits { struct mlx5_ifc_modify_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5986,7 +5986,7 @@ struct mlx5_ifc_destroy_xrq_out_bits { struct mlx5_ifc_destroy_xrq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6008,7 +6008,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits { struct mlx5_ifc_destroy_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6074,7 +6074,7 @@ struct mlx5_ifc_destroy_srq_out_bits { struct mlx5_ifc_destroy_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6208,7 +6208,7 @@ struct mlx5_ifc_destroy_rmp_out_bits { struct mlx5_ifc_destroy_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6641,7 +6641,7 @@ struct mlx5_ifc_create_xrq_out_bits { struct mlx5_ifc_create_xrq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6665,7 +6665,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits { struct mlx5_ifc_create_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6741,7 +6741,7 @@ struct mlx5_ifc_create_srq_out_bits { struct mlx5_ifc_create_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6871,7 +6871,7 @@ struct mlx5_ifc_create_rmp_out_bits { struct mlx5_ifc_create_rmp_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7272,7 +7272,7 @@ enum { struct mlx5_ifc_arm_xrc_srq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7300,7 +7300,7 @@ enum { struct mlx5_ifc_arm_rq_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 24ff23e27c8a..1b1f3c20c6a3 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -61,6 +61,7 @@ struct mlx5_srq_attr { u32 tm_next_tag; u32 tm_hw_phase_cnt; u32 tm_sw_phase_cnt; + u16 uid; }; struct mlx5_core_dev; -- cgit v1.2.3 From 774ea6eea29025218f75bc94c764df9a641db471 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 20 Sep 2018 21:35:25 +0300 Subject: net/mlx5: Set uid as part of DCT commands Set uid as part of DCT commands so that the firmware can manage the DCT object in a secured way. That will enable using a DCT that was created by verbs application to be used by the DEVX flow in case the uid is equal. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 85f1237d80db..62c0592a9fdb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5918,7 +5918,7 @@ struct mlx5_ifc_drain_dct_out_bits { struct mlx5_ifc_drain_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6382,7 +6382,7 @@ struct mlx5_ifc_destroy_dct_out_bits { struct mlx5_ifc_destroy_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7138,7 +7138,7 @@ struct mlx5_ifc_create_dct_out_bits { struct mlx5_ifc_create_dct_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From bd37197554eb28a7fc38e44e005e303c77f788ed Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 20 Sep 2018 21:35:26 +0300 Subject: net/mlx5: Update mlx5_ifc with DEVX UID bits Add DEVX information to WQ, SRQ, CQ, TIR, TIS, QP, RQ, XRCD, PD, MKEY and MCG. Each object that is created/destroyed/modified via verbs will be stamped with a UID based on its user context. This is already done for DEVX objects commands. This will enable the firmware to enforce the usage of kernel objects from the DEVX flow by validating that the same UID is used and the resources are really related to the same user. The addition of *_valid fields are needed to distinguish how various addresses are passed. For non-DEVX callers, all those fields will be zero. Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 67 +++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 62c0592a9fdb..68f4d5f9d929 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1290,7 +1290,9 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_118[0x3]; u8 log_wq_sz[0x5]; - u8 reserved_at_120[0x3]; + u8 dbr_umem_valid[0x1]; + u8 wq_umem_valid[0x1]; + u8 reserved_at_122[0x1]; u8 log_hairpin_num_packets[0x5]; u8 reserved_at_128[0x3]; u8 log_hairpin_data_sz[0x5]; @@ -2364,7 +2366,10 @@ struct mlx5_ifc_qpc_bits { u8 dc_access_key[0x40]; - u8 reserved_at_680[0xc0]; + u8 reserved_at_680[0x3]; + u8 dbr_umem_valid[0x1]; + + u8 reserved_at_684[0xbc]; }; struct mlx5_ifc_roce_addr_layout_bits { @@ -2464,7 +2469,7 @@ struct mlx5_ifc_xrc_srqc_bits { u8 wq_signature[0x1]; u8 cont_srq[0x1]; - u8 reserved_at_22[0x1]; + u8 dbr_umem_valid[0x1]; u8 rlky[0x1]; u8 basic_cyclic_rcv_wqe[0x1]; u8 log_rq_stride[0x3]; @@ -3128,7 +3133,9 @@ enum { struct mlx5_ifc_cqc_bits { u8 status[0x4]; - u8 reserved_at_4[0x4]; + u8 reserved_at_4[0x2]; + u8 dbr_umem_valid[0x1]; + u8 reserved_at_7[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; @@ -5314,7 +5321,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits { struct mlx5_ifc_modify_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5353,7 +5360,7 @@ struct mlx5_ifc_modify_tir_out_bits { struct mlx5_ifc_modify_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5454,7 +5461,7 @@ struct mlx5_ifc_rqt_bitmask_bits { struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -5641,7 +5648,10 @@ struct mlx5_ifc_modify_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x40]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2c1[0x5bf]; u8 pas[0][0x40]; }; @@ -5962,7 +5972,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits { struct mlx5_ifc_detach_from_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6030,7 +6040,7 @@ struct mlx5_ifc_destroy_tis_out_bits { struct mlx5_ifc_destroy_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6052,7 +6062,7 @@ struct mlx5_ifc_destroy_tir_out_bits { struct mlx5_ifc_destroy_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6142,7 +6152,7 @@ struct mlx5_ifc_destroy_rqt_out_bits { struct mlx5_ifc_destroy_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6507,7 +6517,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits { struct mlx5_ifc_dealloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6595,7 +6605,7 @@ struct mlx5_ifc_dealloc_pd_out_bits { struct mlx5_ifc_dealloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6674,7 +6684,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits { struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x40]; + u8 xrc_srq_umem_valid[0x1]; + u8 reserved_at_2c1[0x5bf]; u8 pas[0][0x40]; }; @@ -6693,7 +6705,7 @@ struct mlx5_ifc_create_tis_out_bits { struct mlx5_ifc_create_tis_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6717,7 +6729,7 @@ struct mlx5_ifc_create_tir_out_bits { struct mlx5_ifc_create_tir_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6823,7 +6835,7 @@ struct mlx5_ifc_create_rqt_out_bits { struct mlx5_ifc_create_rqt_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -6908,7 +6920,10 @@ struct mlx5_ifc_create_qp_in_bits { struct mlx5_ifc_qpc_bits qpc; - u8 reserved_at_800[0x80]; + u8 reserved_at_800[0x60]; + + u8 wq_umem_valid[0x1]; + u8 reserved_at_861[0x1f]; u8 pas[0][0x40]; }; @@ -6970,7 +6985,8 @@ struct mlx5_ifc_create_mkey_in_bits { u8 reserved_at_40[0x20]; u8 pg_access[0x1]; - u8 reserved_at_61[0x1f]; + u8 mkey_umem_valid[0x1]; + u8 reserved_at_62[0x1e]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; @@ -7173,7 +7189,10 @@ struct mlx5_ifc_create_cq_in_bits { struct mlx5_ifc_cqc_bits cq_context; - u8 reserved_at_280[0x600]; + u8 reserved_at_280[0x60]; + + u8 cq_umem_valid[0x1]; + u8 reserved_at_2e1[0x59f]; u8 pas[0][0x40]; }; @@ -7221,7 +7240,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits { struct mlx5_ifc_attach_to_mcg_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7348,7 +7367,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits { struct mlx5_ifc_alloc_xrcd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; @@ -7436,7 +7455,7 @@ struct mlx5_ifc_alloc_pd_out_bits { struct mlx5_ifc_alloc_pd_in_bits { u8 opcode[0x10]; - u8 reserved_at_10[0x10]; + u8 uid[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; -- cgit v1.2.3 From 6f99528e9797794b91b43321fbbc93fe772b0803 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:49 +0300 Subject: net: core: netlink: add helper refcount dec and lock function Rtnl lock is encapsulated in netlink and cannot be accessed by other modules directly. This means that reference counted objects that rely on rtnl lock cannot use it with refcounter helper function that atomically releases decrements reference and obtains mutex. This patch implements simple wrapper function around refcount_dec_and_lock that obtains rtnl lock if reference counter value reached 0. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5225832bd6ff..9cdd76348d9a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -34,6 +35,7 @@ extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); +extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; -- cgit v1.2.3 From 3a7d0d07a386716b459b00783b11a8211cefcc0f Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 24 Sep 2018 19:22:51 +0300 Subject: net: sched: extend Qdisc with rcu Currently, Qdisc API functions assume that users have rtnl lock taken. To implement rtnl unlocked classifiers update interface, Qdisc API must be extended with functions that do not require rtnl lock. Extend Qdisc structure with rcu. Implement special version of put function qdisc_put_unlocked() that is called without rtnl lock taken. This function only takes rtnl lock if Qdisc reference counter reached zero and is intended to be used as optimization. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9cdd76348d9a..bb9cb84114c1 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } +static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) +{ + return rcu_dereference(dev->ingress_queue); +} + struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS -- cgit v1.2.3 From 8bad74f9840f87661f20ced3dc80c84ab4fd55a1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:36 +0000 Subject: bpf: extend cgroup bpf core to allow multiple cgroup storage types In order to introduce per-cpu cgroup storage, let's generalize bpf cgroup core to support multiple cgroup storage types. Potentially, per-node cgroup storage can be added later. This commit is mostly a formal change that replaces cgroup_storage pointer with a array of cgroup_storage pointers. It doesn't actually introduce a new storage type, it will be done later. Each bpf program is now able to have one cgroup storage of each type. Signed-off-by: Roman Gushchin Acked-by: Song Liu Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++++++++++---------- include/linux/bpf.h | 11 +++++++++-- 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index f91b0f8ff3a9..e9871b012dac 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -2,6 +2,7 @@ #ifndef _BPF_CGROUP_H #define _BPF_CGROUP_H +#include #include #include #include @@ -22,7 +23,10 @@ struct bpf_cgroup_storage; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) -DECLARE_PER_CPU(void*, bpf_cgroup_storage); +DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); + +#define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) struct bpf_cgroup_storage_map; @@ -43,7 +47,7 @@ struct bpf_cgroup_storage { struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; - struct bpf_cgroup_storage *storage; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; struct bpf_prog_array; @@ -101,18 +105,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum bpf_attach_type type); -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) +static inline enum bpf_cgroup_storage_type cgroup_storage_type( + struct bpf_map *map) { + return BPF_CGROUP_STORAGE_SHARED; +} + +static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage + *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) +{ + enum bpf_cgroup_storage_type stype; struct bpf_storage_buffer *buf; - if (!storage) - return; + for_each_cgroup_storage_type(stype) { + if (!storage[stype]) + continue; - buf = READ_ONCE(storage->buf); - this_cpu_write(bpf_cgroup_storage, &buf->data[0]); + buf = READ_ONCE(storage[stype]->buf); + this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]); + } } -struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog); +struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, + enum bpf_cgroup_storage_type stype); void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage); void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, struct cgroup *cgroup, @@ -265,13 +280,14 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {} +static inline void bpf_cgroup_storage_set( + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map) { return 0; } static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map) {} static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( - struct bpf_prog *prog) { return 0; } + struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; } static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} @@ -293,6 +309,8 @@ static inline void bpf_cgroup_storage_free( #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) +#define for_each_cgroup_storage_type(stype) for (; false; ) + #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 988a00797bcd..b457fbe7b70b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -272,6 +272,13 @@ struct bpf_prog_offload { u32 jited_len; }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + __BPF_CGROUP_STORAGE_MAX +}; + +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -289,7 +296,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ - struct bpf_map *cgroup_storage; + struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY void *security; @@ -358,7 +365,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, */ struct bpf_prog_array_item { struct bpf_prog *prog; - struct bpf_cgroup_storage *cgroup_storage; + struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; struct bpf_prog_array { -- cgit v1.2.3 From f294b37ec7b24a574884cd157497a3748081c0f0 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:40 +0000 Subject: bpf: rework cgroup storage pointer passing To simplify the following introduction of per-cpu cgroup storage, let's rework a bit a mechanism of passing a pointer to a cgroup storage into the bpf_get_local_storage(). Let's save a pointer to the corresponding bpf_cgroup_storage structure, instead of a pointer to the actual buffer. It will help us to handle per-cpu storage later, which has a different way of accessing to the actual data. Signed-off-by: Roman Gushchin Acked-by: Song Liu Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index e9871b012dac..7e0c9a1d48b7 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -23,7 +23,8 @@ struct bpf_cgroup_storage; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) -DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); +DECLARE_PER_CPU(struct bpf_cgroup_storage*, + bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) @@ -115,15 +116,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { enum bpf_cgroup_storage_type stype; - struct bpf_storage_buffer *buf; - - for_each_cgroup_storage_type(stype) { - if (!storage[stype]) - continue; - buf = READ_ONCE(storage[stype]->buf); - this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]); - } + for_each_cgroup_storage_type(stype) + this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); } struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, -- cgit v1.2.3 From b741f1630346defcbc8cc60f1a2bdae8b3b0036f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Sep 2018 14:45:43 +0000 Subject: bpf: introduce per-cpu cgroup local storage This commit introduced per-cpu cgroup local storage. Per-cpu cgroup local storage is very similar to simple cgroup storage (let's call it shared), except all the data is per-cpu. The main goal of per-cpu variant is to implement super fast counters (e.g. packet counters), which don't require neither lookups, neither atomic operations. >From userspace's point of view, accessing a per-cpu cgroup storage is similar to other per-cpu map types (e.g. per-cpu hashmaps and arrays). Writing to a per-cpu cgroup storage is not atomic, but is performed by copying longs, so some minimal atomicity is here, exactly as with other per-cpu maps. Signed-off-by: Roman Gushchin Cc: Daniel Borkmann Cc: Alexei Starovoitov Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 20 +++++++++++++++++++- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7e0c9a1d48b7..588dd5f0bd85 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -37,7 +37,10 @@ struct bpf_storage_buffer { }; struct bpf_cgroup_storage { - struct bpf_storage_buffer *buf; + union { + struct bpf_storage_buffer *buf; + void __percpu *percpu_buf; + }; struct bpf_cgroup_storage_map *map; struct bpf_cgroup_storage_key key; struct list_head list; @@ -109,6 +112,9 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { + if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) + return BPF_CGROUP_STORAGE_PERCPU; + return BPF_CGROUP_STORAGE_SHARED; } @@ -131,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); +int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, + void *value, u64 flags); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -285,6 +295,14 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; } static inline void bpf_cgroup_storage_free( struct bpf_cgroup_storage *storage) {} +static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, + void *value) { + return 0; +} +static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, + void *key, void *value, u64 flags) { + return 0; +} #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b457fbe7b70b..018299a595c8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -274,6 +274,7 @@ struct bpf_prog_offload { enum bpf_cgroup_storage_type { BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, __BPF_CGROUP_STORAGE_MAX }; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index c9bd6fb765b0..5432f4c9f50e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) #endif #ifdef CONFIG_CGROUP_BPF BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) -- cgit v1.2.3 From 59c9d35ea9cd73c3a55642ec9a0097770baccb93 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Wed, 5 Sep 2018 17:06:37 +0300 Subject: net/mlx5: Cache the system image guid The system image guid is a read-only field which is used by the TC offloads code to determine if two mlx5 devices belong to the same ASIC while adding flows. Read this once and save it on the core device rather than querying each time an offloaded flow is added. Signed-off-by: Alaa Hleihel Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/vport.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ed73b51f6697..26a92462f4ce 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -838,6 +838,7 @@ struct mlx5_core_dev { u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; } caps; + u64 sys_image_guid; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; enum mlx5_device_state state; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 7e7c6dfcfb09..9c694808c212 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -121,4 +121,6 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, struct mlx5_core_dev *port_mdev); int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); + +u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From b31cdffa2329fe330cd304ca26c250dd1520fb0a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:09 +0200 Subject: net: phy: Move linkmode helpers to somewhere public phylink has some useful helpers to working with linkmode bitmaps. Move them to there own header so other code can use them. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/linkmode.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mii.h | 1 + include/linux/phy.h | 1 + 3 files changed, 69 insertions(+) create mode 100644 include/linux/linkmode.h (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h new file mode 100644 index 000000000000..014fb86c7114 --- /dev/null +++ b/include/linux/linkmode.h @@ -0,0 +1,67 @@ +#ifndef __LINKMODE_H +#define __LINKMODE_H + +#include +#include +#include + +static inline void linkmode_zero(unsigned long *dst) +{ + bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_copy(unsigned long *dst, const unsigned long *src) +{ + bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_and(unsigned long *dst, const unsigned long *a, + const unsigned long *b) +{ + bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_or(unsigned long *dst, const unsigned long *a, + const unsigned long *b) +{ + bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline bool linkmode_empty(const unsigned long *src) +{ + return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_andnot(dst, src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) +{ + __set_bit(nr, addr); +} + +static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) +{ + __clear_bit(nr, addr); +} + +static inline void linkmode_change_bit(int nr, volatile unsigned long *addr) +{ + __change_bit(nr, addr); +} + +static inline int linkmode_test_bit(int nr, volatile unsigned long *addr) +{ + return test_bit(nr, addr); +} + +static inline int linkmode_equal(const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +#endif /* __LINKMODE_H */ diff --git a/include/linux/mii.h b/include/linux/mii.h index 55000ee5c6ad..567047ef0309 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -10,6 +10,7 @@ #include +#include #include struct ethtool_cmd; diff --git a/include/linux/phy.h b/include/linux/phy.h index 192a1fa0c73b..d24cc46748e2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From ab2a605fa621ecf4ec26603a237822f7772cfa28 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:10 +0200 Subject: net: phy: Add phydev_warn() Not all new style LINK_MODE bits can be converted into old style SUPPORTED bits. We need to warn when such a conversion is attempted. Add a helper for this. Convert all pr_warn() calls to phydev_warn() where possible. Signed-off-by: Andrew Lunn Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index d24cc46748e2..0ab9f89773fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_warn(_phydev, format, args...) \ + dev_warn(&_phydev->mdio.dev, format, ##args) + #define phydev_dbg(_phydev, format, args...) \ dev_dbg(&_phydev->mdio.dev, format, ##args) -- cgit v1.2.3 From c4fabb8b3c0d724eb93dabaf346b0dd8a8be7118 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:11 +0200 Subject: net: phy: Add phydev_info() Add phydev_info() and make use of it within the phy drivers and core code. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0ab9f89773fd..0f6e7bf5e9c5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value) #define phydev_err(_phydev, format, args...) \ dev_err(&_phydev->mdio.dev, format, ##args) +#define phydev_info(_phydev, format, args...) \ + dev_info(&_phydev->mdio.dev, format, ##args) + #define phydev_warn(_phydev, format, args...) \ dev_warn(&_phydev->mdio.dev, format, ##args) -- cgit v1.2.3 From edc7ccbbcf32b97c7d26cd556f364eb4f22c4285 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:12 +0200 Subject: net: phy: Add helper to convert MII ADV register to a linkmode The phy_mii_ioctl can be used to write a value into the MII_ADVERTISE register in the PHY. Since this changes the state of the PHY, we need to make the same change to phydev->advertising. Add a helper which can convert the register value to a linkmode. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/mii.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 567047ef0309..8c7da9473ad9 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -303,6 +303,37 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa) return result | mii_adv_to_ethtool_adv_x(lpa); } +/** + * mii_adv_to_linkmode_adv_t + * @advertising:pointer to destination link mode. + * @adv: value of the MII_ADVERTISE register + * + * A small helper function that translates MII_ADVERTISE bits + * to linkmode advertisement settings. + */ +static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, + u32 adv) +{ + linkmode_zero(advertising); + + if (adv & ADVERTISE_10HALF) + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, + advertising); + if (adv & ADVERTISE_10FULL) + linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, + advertising); + if (adv & ADVERTISE_100HALF) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, + advertising); + if (adv & ADVERTISE_100FULL) + linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, + advertising); + if (adv & ADVERTISE_PAUSE_CAP) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising); + if (adv & ADVERTISE_PAUSE_ASYM) + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising); +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From 5f991f7bddc991ecc3c8a009ffd76fccff4661c7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:13 +0200 Subject: net: phy: Add helper for advertise to lcl value Add a helper to convert the local advertising to an LCL capabilities, which is then used to resolve pause flow control settings. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/mii.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 8c7da9473ad9..9ed49c8261d0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -334,6 +334,25 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising, linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising); } +/** + * ethtool_adv_to_lcl_adv_t + * @advertising:pointer to ethtool advertising + * + * A small helper function that translates ethtool advertising to LVL + * pause capabilities. + */ +static inline u32 ethtool_adv_to_lcl_adv_t(u32 advertising) +{ + u32 lcl_adv = 0; + + if (advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + return lcl_adv; +} + /** * mii_advertise_flowctrl - get flow control advertisement flags * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both) -- cgit v1.2.3 From f954a04ea18ebfcba1cd2756eaee59eb4978a20e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:14 +0200 Subject: net: phy: Add limkmode equivalents to some of the MII ethtool helpers Add helpers which take a linkmode rather than a u32 ethtool for advertising settings. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- include/linux/mii.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mii.h b/include/linux/mii.h index 9ed49c8261d0..2da85b02e1c0 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -132,6 +132,34 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv) return result; } +/** + * linkmode_adv_to_mii_adv_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * MII_ADVERTISE register. + */ +static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, advertising)) + result |= ADVERTISE_10HALF; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, advertising)) + result |= ADVERTISE_10FULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, advertising)) + result |= ADVERTISE_100HALF; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, advertising)) + result |= ADVERTISE_100FULL; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) + result |= ADVERTISE_PAUSE_CAP; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) + result |= ADVERTISE_PAUSE_ASYM; + + return result; +} + /** * mii_adv_to_ethtool_adv_t * @adv: value of the MII_ADVERTISE register @@ -179,6 +207,28 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv) return result; } +/** + * linkmode_adv_to_mii_ctrl1000_t + * advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * MII_CTRL1000 register when in 1000T mode. + */ +static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + advertising)) + result |= ADVERTISE_1000HALF; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + advertising)) + result |= ADVERTISE_1000FULL; + + return result; +} + /** * mii_ctrl1000_to_ethtool_adv_t * @adv: value of the MII_CTRL1000 register -- cgit v1.2.3 From 719655a149715f26fc4de904fe0aa83068bd5b9e Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 29 Sep 2018 23:04:16 +0200 Subject: net: phy: Replace phy driver features u32 with link_mode bitmap This is one step in allowing phylib to make use of link_mode bitmaps, instead of u32 for supported and advertised features. Convert the phy drivers to use bitmaps to indicates the features they support. Build bitmap equivalents of the u32 values at runtime, and have the drivers point to the appropriate bitmap. These bitmaps are shared, and we don't want a driver to modify them. So mark them __ro_after_init. Within phylib, the features bitmap is currently turned back into a u32. This will be removed once the whole of phylib, and the drivers are converted to use bitmaps. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 9 +++++++++ include/linux/phy.h | 24 ++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 014fb86c7114..22443d7fb5cd 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -43,6 +43,15 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) __set_bit(nr, addr); } +static inline void linkmode_set_bit_array(const int *array, int array_size, + unsigned long *addr) +{ + int i; + + for (i = 0; i < array_size; i++) + linkmode_set_bit(array[i], addr); +} + static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) { __clear_bit(nr, addr); diff --git a/include/linux/phy.h b/include/linux/phy.h index 0f6e7bf5e9c5..dff51dd36e52 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -42,13 +42,21 @@ #define PHY_1000BT_FEATURES (SUPPORTED_1000baseT_Half | \ SUPPORTED_1000baseT_Full) -#define PHY_BASIC_FEATURES (PHY_10BT_FEATURES | \ - PHY_100BT_FEATURES | \ - PHY_DEFAULT_FEATURES) - -#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ - PHY_1000BT_FEATURES) - +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; + +#define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) +#define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) +#define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) +#define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) +#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) +#define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) +#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) /* * Set phydev->irq to PHY_POLL if interrupts are not supported, @@ -510,7 +518,7 @@ struct phy_driver { u32 phy_id; char *name; u32 phy_id_mask; - u32 features; + const unsigned long * const features; u32 flags; const void *driver_data; -- cgit v1.2.3 From 9f2959b6b52d43326b2f6a0e0d7ffe6f4fc3b5ca Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 28 Sep 2018 08:51:09 +0200 Subject: net: phy: improve handling delayed work Using mod_delayed_work() allows to simplify handling delayed work and removes the need for the sync parameter in phy_trigger_machine(). Also introduce a helper phy_queue_state_machine() to encapsulate the low-level delayed work calls. No functional change intended. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index dff51dd36e52..3ea87f774a76 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1054,7 +1054,7 @@ void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -void phy_trigger_machine(struct phy_device *phydev, bool sync); +void phy_trigger_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd); -- cgit v1.2.3 From cc16567e5a8a7bb9439ef61ab80069acdd33f76f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 2 Oct 2018 11:03:40 +0200 Subject: net: drop unused skb_append_datato_frags() This helper is unused since commit 988cf74deb45 ("inet: Stop generating UFO packets.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 87e29710373f..119d092c6b13 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1082,11 +1082,6 @@ static inline int skb_pad(struct sk_buff *skb, int pad) } #define dev_kfree_skb(a) consume_skb(a) -int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int getfrag(void *from, char *to, int offset, - int len, int odd, struct sk_buff *skb), - void *from, int length); - int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size); -- cgit v1.2.3 From 5bf0961cc6a180c077793f2615a8fd842c655876 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 2 Oct 2018 06:16:11 -0700 Subject: qed: Add driver support for 20G link speed. Add driver support for configuring/reading the 20G link speed. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8cd34645e892..dee3c9c744f7 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -670,10 +670,11 @@ enum qed_link_mode_bits { QED_LM_1000baseT_Half_BIT = BIT(4), QED_LM_1000baseT_Full_BIT = BIT(5), QED_LM_10000baseKR_Full_BIT = BIT(6), - QED_LM_25000baseKR_Full_BIT = BIT(7), - QED_LM_40000baseLR4_Full_BIT = BIT(8), - QED_LM_50000baseKR2_Full_BIT = BIT(9), - QED_LM_100000baseKR4_Full_BIT = BIT(10), + QED_LM_20000baseKR2_Full_BIT = BIT(7), + QED_LM_25000baseKR_Full_BIT = BIT(8), + QED_LM_40000baseLR4_Full_BIT = BIT(9), + QED_LM_50000baseKR2_Full_BIT = BIT(10), + QED_LM_100000baseKR4_Full_BIT = BIT(11), QED_LM_COUNT = 11 }; -- cgit v1.2.3 From f3709f69b7c5cba6323cc03c29b64293b93be817 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:29 -0700 Subject: bpf: Add iterator for spilled registers Add this iterator for spilled registers, it concentrates the details of how to get the current frame's spilled registers into a single macro while clarifying the intention of the code which is calling the macro. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b42b60a83e19..d0e7f97e8b60 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -131,6 +131,17 @@ struct bpf_verifier_state { u32 curframe; }; +#define bpf_get_spilled_reg(slot, frame) \ + (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ + (frame->stack[slot].slot_type[0] == STACK_SPILL)) \ + ? &frame->stack[slot].spilled_ptr : NULL) + +/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ +#define bpf_for_each_spilled_reg(iter, frame, reg) \ + for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ + iter < frame->allocated_stack / BPF_REG_SIZE; \ + iter++, reg = bpf_get_spilled_reg(iter, frame)) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; -- cgit v1.2.3 From c64b7983288e636356f7f5f652de4813e1cfedac Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:33 -0700 Subject: bpf: Add PTR_TO_SOCKET verifier type Teach the verifier a little bit about a new type of pointer, a PTR_TO_SOCKET. This pointer type is accessed from BPF through the 'struct bpf_sock' structure. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/bpf_verifier.h | 2 ++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 018299a595c8..027697b6a22f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -154,6 +154,7 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ }; /* type of values returned from helper functions */ @@ -162,6 +163,7 @@ enum bpf_return_type { RET_VOID, /* function doesn't return anything */ RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ + RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -213,6 +215,8 @@ enum bpf_reg_type { PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ + PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -343,6 +347,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); +typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog, + u32 *target_size); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); @@ -836,4 +845,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +#if defined(CONFIG_NET) +bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info); +u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size); +#else +static inline bool bpf_sock_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + return false; +} +static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + return 0; +} +#endif + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d0e7f97e8b60..a411363098a5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -58,6 +58,8 @@ struct bpf_reg_state { * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we * came from, when one is tested for != NULL. + * For PTR_TO_SOCKET this is used to share which pointers retain the + * same reference to the socket, to determine proper reference freeing. */ u32 id; /* For scalar types (SCALAR_VALUE), this represents our knowledge of -- cgit v1.2.3 From fd978bf7fd312581a7ca454a991f0ffb34c4204b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Oct 2018 13:35:35 -0700 Subject: bpf: Add reference tracking to verifier Allow helper functions to acquire a reference and return it into a register. Specific pointer types such as the PTR_TO_SOCKET will implicitly represent such a reference. The verifier must ensure that these references are released exactly once in each path through the program. To achieve this, this commit assigns an id to the pointer and tracks it in the 'bpf_func_state', then when the function or program exits, verifies that all of the acquired references have been freed. When the pointer is passed to a function that frees the reference, it is removed from the 'bpf_func_state` and all existing copies of the pointer in registers are marked invalid. Signed-off-by: Joe Stringer Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a411363098a5..7b6fd2ab3263 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -104,6 +104,17 @@ struct bpf_stack_state { u8 slot_type[BPF_REG_SIZE]; }; +struct bpf_reference_state { + /* Track each reference created with a unique id, even if the same + * instruction creates the reference multiple times (eg, via CALL). + */ + int id; + /* Instruction where the allocation of this reference occurred. This + * is used purely to inform the user of a reference leak. + */ + int insn_idx; +}; + /* state of the program: * type of all registers and stack info */ @@ -121,7 +132,9 @@ struct bpf_func_state { */ u32 subprogno; - /* should be second to last. See copy_func_state() */ + /* The following fields should be last. See copy_func_state() */ + int acquired_refs; + struct bpf_reference_state *refs; int allocated_stack; struct bpf_stack_state *stack; }; @@ -217,11 +230,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); -static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; - return cur->frame[cur->curframe]->regs; + return cur->frame[cur->curframe]; +} + +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +{ + return cur_func(env)->regs; } int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); -- cgit v1.2.3 From f3edc2dbe0ad0bbbd8450cd37328f99acf215fd8 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 1 Oct 2018 17:02:43 +0100 Subject: net: usbnet: make driver_info const The driver_info field that is used for describing each of the usb-net drivers using the usbnet.c core all declare their information as const and the usbnet.c itself does not try and modify the struct. It is therefore a good idea to make this const in the usbnet.c structure in case anyone tries to modify it. Signed-off-by: Ben Dooks Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e2ec3582e549..d8860f2d0976 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -28,7 +28,7 @@ struct usbnet { /* housekeeping */ struct usb_device *udev; struct usb_interface *intf; - struct driver_info *driver_info; + const struct driver_info *driver_info; const char *driver_name; void *driver_priv; wait_queue_head_t wait; -- cgit v1.2.3 From 16fc087b9cb22c9a97307cc24a5413d0df68fe11 Mon Sep 17 00:00:00 2001 From: Yashaswini Raghuram Prathivadi Bhayankaram Date: Fri, 10 Aug 2018 00:17:44 -0700 Subject: virtchnl: Added support to exchange additional speed values Introduced a new virtchnl capability flag and a struct to support exchange of additional supported speeds. Signed-off-by: Yashaswini Raghuram Prathivadi Bhayankaram Signed-off-by: Anirudh Venkataramanan Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index b41f7bc958ef..2c9756bd9c4c 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -252,6 +252,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM 0X00400000 #define VIRTCHNL_VF_OFFLOAD_ADQ 0X00800000 +/* Define below the capability flags that are not offloads */ +#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED 0x00000080 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ VIRTCHNL_VF_OFFLOAD_RSS_PF) @@ -596,10 +598,23 @@ enum virtchnl_event_codes { struct virtchnl_pf_event { enum virtchnl_event_codes event; union { + /* If the PF driver does not support the new speed reporting + * capabilities then use link_event else use link_event_adv to + * get the speed and link information. The ability to understand + * new speeds is indicated by setting the capability flag + * VIRTCHNL_VF_CAP_ADV_LINK_SPEED in vf_cap_flags parameter + * in virtchnl_vf_resource struct and can be used to determine + * which link event struct to use below. + */ struct { enum virtchnl_link_speed link_speed; bool link_status; } link_event; + struct { + /* link_speed provided in Mbps */ + u32 link_speed; + u8 link_status; + } link_event_adv; } event_data; int severity; -- cgit v1.2.3 From fcd29ad17c6ff885dfae58f557e9323941e63ba2 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Thu, 9 Aug 2018 09:55:21 +0300 Subject: net/mlx5: Add Fast teardown support Today mlx5 devices support two teardown modes: 1- Regular teardown 2- Force teardown This change introduces the enhanced version of the "Force teardown" that allows SW to perform teardown in a faster way without the need to reclaim all the pages. Fast teardown provides the following advantages: 1- Fix a FW race condition that could cause command timeout 2- Avoid moving to polling mode 3- Close the vport to prevent PCI ACK to be sent without been scatter to memory Signed-off-by: Feras Daoud Reviewed-by: Majd Dibbiny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 11fa4e66afc5..e9b502d5bcc1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -504,6 +504,10 @@ struct health_buffer { __be16 ext_synd; }; +enum mlx5_cmd_addr_l_sz_offset { + MLX5_NIC_IFC_OFFSET = 8, +}; + struct mlx5_init_seg { __be32 fw_rev; __be32 cmdif_rev_fw_sub; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f043d65b9bac..6e8a882052b1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -896,7 +896,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_mkey[0x6]; u8 reserved_at_f0[0x8]; u8 dump_fill_mkey[0x1]; - u8 reserved_at_f9[0x3]; + u8 reserved_at_f9[0x2]; + u8 fast_teardown[0x1]; u8 log_max_eq[0x4]; u8 max_indirection[0x8]; @@ -3352,12 +3353,13 @@ struct mlx5_ifc_teardown_hca_out_bits { u8 reserved_at_40[0x3f]; - u8 force_state[0x1]; + u8 state[0x1]; }; enum { MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE = 0x0, MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE = 0x1, + MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN = 0x2, }; struct mlx5_ifc_teardown_hca_in_bits { -- cgit v1.2.3 From bbb4c4323a4d9cb5ca04db904aa3050a7586839a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 14:27:55 +0100 Subject: dns: Allow the dns resolver to retrieve a server set Allow the DNS resolver to retrieve a set of servers and their associated addresses, ports, preference and weight ratings. In terms of communication with userspace, "srv=1" is added to the callout string (the '1' indicating the maximum data version supported by the kernel) to ask the userspace side for this. If the userspace side doesn't recognise it, it will ignore the option and return the usual text address list. If the userspace side does recognise it, it will return some binary data that begins with a zero byte that would cause the string parsers to give an error. The second byte contains the version of the data in the blob (this may be between 1 and the version specified in the callout data). The remainder of the payload is version-specific. In version 1, the payload looks like (note that this is packed): u8 Non-string marker (ie. 0) u8 Content (0 => Server list) u8 Version (ie. 1) u8 Source (eg. DNS_RECORD_FROM_DNS_SRV) u8 Status (eg. DNS_LOOKUP_GOOD) u8 Number of servers foreach-server { u16 Name length (LE) u16 Priority (as per SRV record) (LE) u16 Weight (as per SRV record) (LE) u16 Port (LE) u8 Source (eg. DNS_RECORD_FROM_NSS) u8 Status (eg. DNS_LOOKUP_GOT_NOT_FOUND) u8 Protocol (eg. DNS_SERVER_PROTOCOL_UDP) u8 Number of addresses char[] Name (not NUL-terminated) foreach-address { u8 Family (AF_INET{,6}) union { u8[4] ipv4_addr u8[16] ipv6_addr } } } This can then be used to fetch a whole cell's VL-server configuration for AFS, for example. Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/linux/dns_resolver.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h index 6ac3cad9aef1..34a744a1bafc 100644 --- a/include/linux/dns_resolver.h +++ b/include/linux/dns_resolver.h @@ -24,11 +24,9 @@ #ifndef _LINUX_DNS_RESOLVER_H #define _LINUX_DNS_RESOLVER_H -#ifdef __KERNEL__ +#include extern int dns_query(const char *type, const char *name, size_t namelen, const char *options, char **_result, time64_t *_expiry); -#endif /* KERNEL */ - #endif /* _LINUX_DNS_RESOLVER_H */ -- cgit v1.2.3 From 661b8d1b0e3a745e25f05adef2ebd00d830eeea7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 1 Oct 2018 14:51:33 +0200 Subject: net: add umem reference in netdev{_rx}_queue These references to the umem will be used to store information on what kind of AF_XDP umem that is bound to a queue id, if any. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1cbbf77a685f..8318f79586c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -609,6 +609,9 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif /* * write-mostly part */ @@ -738,6 +741,9 @@ struct netdev_rx_queue { struct kobject kobj; struct net_device *dev; struct xdp_rxq_info xdp_rxq; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif } ____cacheline_aligned_in_smp; /* -- cgit v1.2.3 From c2a90025ad09d830c8d8ae69f485eac6aaaa2472 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Thu, 4 Oct 2018 14:22:03 +0200 Subject: phy: add QSGMII and PCIE modes Prepare for upcoming phys that'll handle QSGMII or PCIe. Reviewed-by: Florian Fainelli Signed-off-by: Quentin Schulz Signed-off-by: David S. Miller --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 9713aebdd348..03b319f89a34 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -37,9 +37,11 @@ enum phy_mode { PHY_MODE_USB_OTG, PHY_MODE_SGMII, PHY_MODE_2500SGMII, + PHY_MODE_QSGMII, PHY_MODE_10GKR, PHY_MODE_UFS_HS_A, PHY_MODE_UFS_HS_B, + PHY_MODE_PCIE, }; /** -- cgit v1.2.3 From c941ce9c282cc606e6517356fcc186a9da2b4ab9 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sun, 7 Oct 2018 12:56:47 +0100 Subject: bpf: add verifier callback to get stack usage info for offloaded progs In preparation for BPF-to-BPF calls in offloaded programs, add a new function attribute to the struct bpf_prog_offload_ops so that drivers supporting eBPF offload can hook at the end of program verification, and potentially extract information collected by the verifier. Implement a minimal callback (returning 0) in the drivers providing the structs, namely netdevsim and nfp. This will be useful in the nfp driver, in later commits, to extract the number of subprograms as well as the stack depth for those subprograms. Signed-off-by: Quentin Monnet Reviewed-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 027697b6a22f..9b558713447f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -263,6 +263,7 @@ struct bpf_verifier_ops { struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); + int (*finalize)(struct bpf_verifier_env *env); }; struct bpf_prog_offload { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7b6fd2ab3263..9e8056ec20fa 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -245,5 +245,6 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); +int bpf_prog_offload_finalize(struct bpf_verifier_env *env); #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 4a19edb60d0203cd5bf95a8b46ea8f63fd41194c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:22 -0700 Subject: netlink: Pass extack to dump handlers Declare extack in netlink_dump and pass to dump handlers via netlink_callback. Add any extack message after the dump_done_errno allowing error messages to be returned. This will be useful when strict checking is done on dump requests, returning why the dump fails EINVAL. Signed-off-by: David Ahern Acked-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 71f121b66ca8..88c8a2d83eb3 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -176,6 +176,7 @@ struct netlink_callback { void *data; /* the module that dump function belong to */ struct module *module; + struct netlink_ext_ack *extack; u16 family; u16 min_dump_alloc; unsigned int prev_seq, seq; -- cgit v1.2.3 From 89d35528d17d25819a755a2b52931e911baebc66 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 7 Oct 2018 20:16:27 -0700 Subject: netlink: Add new socket option to enable strict checking on dumps Add a new socket option, NETLINK_DUMP_STRICT_CHK, that userspace can use via setsockopt to request strict checking of headers and attributes on dump requests. To get dump features such as kernel side filtering based on data in the header or attributes appended to the dump request, userspace must call setsockopt() for NETLINK_DUMP_STRICT_CHK and a non-zero value. Since the netlink sock and its flags are private to the af_netlink code, the strict checking flag is passed to dump handlers via a flag in the netlink_callback struct. For old userspace on new kernel there is no impact as all of the data checks in later patches are wrapped in a check on the new strict flag. For new userspace on old kernel, the setsockopt will fail and even if new userspace sets data in the headers and appended attributes the kernel will silently ignore it. Moving forward when the setsockopt succeeds, the new userspace on old kernel means the dump request can pass an attribute the kernel does not understand. The dump will then fail as the older kernel does not understand it. New userspace on new kernel setting the socket option gets the benefit of the improved data dump. Kernel side the NETLINK_DUMP_STRICT_CHK uapi is converted to a generic NETLINK_F_STRICT_CHK flag which can potentially be leveraged for tighter checking on the NEW, DEL, and SET commands. Signed-off-by: David Ahern Acked-by: Christian Brauner Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 88c8a2d83eb3..72580f1a72a2 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -179,6 +179,7 @@ struct netlink_callback { struct netlink_ext_ack *extack; u16 family; u16 min_dump_alloc; + bool strict_check; unsigned int prev_seq, seq; long args[6]; }; -- cgit v1.2.3 From f6a8a19bb11b46d60250ddc4e3e1ba6aa166f488 Mon Sep 17 00:00:00 2001 From: Denis Drozdov Date: Tue, 14 Aug 2018 14:08:51 +0300 Subject: RDMA/netdev: Hoist alloc_netdev_mqs out of the driver netdev has several interfaces that expect to call alloc_netdev_mqs from the core code, with the driver only providing the arguments. This is incompatible with the rdma_netdev interface that returns the netdev directly. Thus re-organize the API used by ipoib so that the verbs core code calls alloc_netdev_mqs for the driver. This is done by allowing the drivers to provide the allocation parameters via a 'get_params' callback and then initializing an allocated netdev as a second step. Fixes: cd565b4b51e5 ("IB/IPoIB: Support acceleration options callbacks") Signed-off-by: Jason Gunthorpe Signed-off-by: Denis Drozdov Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 26a92462f4ce..4b75796cac23 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1228,21 +1228,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); -#ifndef CONFIG_MLX5_CORE_IPOIB -static inline -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)) -{ - return ERR_PTR(-EOPNOTSUPP); -} -#else +#ifdef CONFIG_MLX5_CORE_IPOIB struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, struct ib_device *ibdev, const char *name, void (*setup)(struct net_device *)); #endif /* CONFIG_MLX5_CORE_IPOIB */ +int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, + struct ib_device *device, + struct rdma_netdev_alloc_params *params); struct mlx5_profile { u64 mask; -- cgit v1.2.3 From f458e832ba510f843807fc2c2906a8fb59554c9f Mon Sep 17 00:00:00 2001 From: Chaitanya T K Date: Sat, 6 Oct 2018 19:34:59 +0200 Subject: mac80211: minstrel: Enable STBC and LDPC for VHT Rates If peer support reception of STBC and LDPC, enable them for better performance. Signed-off-by: Chaitanya TK Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c4809ad8ab46..0ef67f837ae1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1670,6 +1670,7 @@ struct ieee80211_mu_edca_param_set { #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 #define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 +#define IEEE80211_VHT_CAP_RXSTBC_SHIFT 8 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT 13 -- cgit v1.2.3 From cc53aabcc283c36274d3f3ce9adc4b40c21d4838 Mon Sep 17 00:00:00 2001 From: Govind Singh Date: Thu, 11 Oct 2018 13:16:01 +0300 Subject: firmware: qcom: scm: Add WLAN VMID for Qualcomm SCM interface Add WLAN related VMID's to support wlan driver to set up the remote's permissions call via TrustZone. Signed-off-by: Govind Singh Reviewed-by: Bjorn Andersson Acked-by: Niklas Cassel Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- include/linux/qcom_scm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 5d65521260b3..06996ad4f2bc 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved. +/* Copyright (c) 2010-2015, 2018, The Linux Foundation. All rights reserved. * Copyright (C) 2015 Linaro Ltd. * * This program is free software; you can redistribute it and/or modify @@ -33,6 +33,8 @@ struct qcom_scm_vmperm { #define QCOM_SCM_VMID_HLOS 0x3 #define QCOM_SCM_VMID_MSS_MSA 0xF +#define QCOM_SCM_VMID_WLAN 0x18 +#define QCOM_SCM_VMID_WLAN_CE 0x19 #define QCOM_SCM_PERM_READ 0x4 #define QCOM_SCM_PERM_WRITE 0x2 #define QCOM_SCM_PERM_EXEC 0x1 -- cgit v1.2.3 From 604326b41a6fb9b4a78b6179335decee0365cd8c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 13 Oct 2018 02:45:58 +0200 Subject: bpf, sockmap: convert to generic sk_msg interface Add a generic sk_msg layer, and convert current sockmap and later kTLS over to make use of it. While sk_buff handles network packet representation from netdevice up to socket, sk_msg handles data representation from application to socket layer. This means that sk_msg framework spans across ULP users in the kernel, and enables features such as introspection or filtering of data with the help of BPF programs that operate on this data structure. Latter becomes in particular useful for kTLS where data encryption is deferred into the kernel, and as such enabling the kernel to perform L7 introspection and policy based on BPF for TLS connections where the record is being encrypted after BPF has run and came to a verdict. In order to get there, first step is to transform open coding of scatter-gather list handling into a common core framework that subsystems can use. The code itself has been split and refactored into three bigger pieces: i) the generic sk_msg API which deals with managing the scatter gather ring, providing helpers for walking and mangling, transferring application data from user space into it, and preparing it for BPF pre/post-processing, ii) the plain sock map itself where sockets can be attached to or detached from; these bits are independent of i) which can now be used also without sock map, and iii) the integration with plain TCP as one protocol to be used for processing L7 application data (later this could e.g. also be extended to other protocols like UDP). The semantics are the same with the old sock map code and therefore no change of user facing behavior or APIs. While pursuing this work it also helped finding a number of bugs in the old sockmap code that we've fixed already in earlier commits. The test_sockmap kselftest suite passes through fine as well. Joint work with John. Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 33 ++--- include/linux/bpf_types.h | 2 +- include/linux/filter.h | 21 --- include/linux/skmsg.h | 371 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 383 insertions(+), 44 deletions(-) create mode 100644 include/linux/skmsg.h (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9b558713447f..e60fff48288b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -737,33 +737,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ -#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) -struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); -struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); -int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); -int sockmap_get_from_fd(const union bpf_attr *attr, int type, - struct bpf_prog *prog); +#if defined(CONFIG_BPF_STREAM_PARSER) +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); #else -static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) -{ - return NULL; -} - -static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map, - void *key) -{ - return NULL; -} - -static inline int sock_map_prog(struct bpf_map *map, - struct bpf_prog *prog, - u32 type) +static inline int sock_map_prog_update(struct bpf_map *map, + struct bpf_prog *prog, u32 which) { return -EOPNOTSUPP; } -static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type, - struct bpf_prog *prog) +static inline int sock_map_get_from_fd(const union bpf_attr *attr, + struct bpf_prog *prog) { return -EINVAL; } @@ -839,6 +824,10 @@ extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; +extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; +extern const struct bpf_func_proto bpf_msg_redirect_map_proto; +extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; +extern const struct bpf_func_proto bpf_sk_redirect_map_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 5432f4c9f50e..fa48343a5ea1 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -57,7 +57,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET) +#if defined(CONFIG_BPF_STREAM_PARSER) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 6791a0ac0139..5771874bc01e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -520,24 +520,6 @@ struct bpf_skb_data_end { void *data_end; }; -struct sk_msg_buff { - void *data; - void *data_end; - __u32 apply_bytes; - __u32 cork_bytes; - int sg_copybreak; - int sg_start; - int sg_curr; - int sg_end; - struct scatterlist sg_data[MAX_SKB_FRAGS]; - bool sg_copy[MAX_SKB_FRAGS]; - __u32 flags; - struct sock *sk_redir; - struct sock *sk; - struct sk_buff *skb; - struct list_head list; -}; - struct bpf_redirect_info { u32 ifindex; u32 flags; @@ -833,9 +815,6 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); -struct sock *do_sk_redirect_map(struct sk_buff *skb); -struct sock *do_msg_redirect_map(struct sk_msg_buff *md); - #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, struct bpf_prog *prog, struct sk_buff *skb, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h new file mode 100644 index 000000000000..95678103c4a0 --- /dev/null +++ b/include/linux/skmsg.h @@ -0,0 +1,371 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ + +#ifndef _LINUX_SKMSG_H +#define _LINUX_SKMSG_H + +#include +#include +#include +#include + +#include +#include +#include + +#define MAX_MSG_FRAGS MAX_SKB_FRAGS + +enum __sk_action { + __SK_DROP = 0, + __SK_PASS, + __SK_REDIRECT, + __SK_NONE, +}; + +struct sk_msg_sg { + u32 start; + u32 curr; + u32 end; + u32 size; + u32 copybreak; + bool copy[MAX_MSG_FRAGS]; + struct scatterlist data[MAX_MSG_FRAGS]; +}; + +struct sk_msg { + struct sk_msg_sg sg; + void *data; + void *data_end; + u32 apply_bytes; + u32 cork_bytes; + u32 flags; + struct sk_buff *skb; + struct sock *sk_redir; + struct sock *sk; + struct list_head list; +}; + +struct sk_psock_progs { + struct bpf_prog *msg_parser; + struct bpf_prog *skb_parser; + struct bpf_prog *skb_verdict; +}; + +enum sk_psock_state_bits { + SK_PSOCK_TX_ENABLED, +}; + +struct sk_psock_link { + struct list_head list; + struct bpf_map *map; + void *link_raw; +}; + +struct sk_psock_parser { + struct strparser strp; + bool enabled; + void (*saved_data_ready)(struct sock *sk); +}; + +struct sk_psock_work_state { + struct sk_buff *skb; + u32 len; + u32 off; +}; + +struct sk_psock { + struct sock *sk; + struct sock *sk_redir; + u32 apply_bytes; + u32 cork_bytes; + u32 eval; + struct sk_msg *cork; + struct sk_psock_progs progs; + struct sk_psock_parser parser; + struct sk_buff_head ingress_skb; + struct list_head ingress_msg; + unsigned long state; + struct list_head link; + spinlock_t link_lock; + refcount_t refcnt; + void (*saved_unhash)(struct sock *sk); + void (*saved_close)(struct sock *sk, long timeout); + void (*saved_write_space)(struct sock *sk); + struct proto *sk_proto; + struct sk_psock_work_state work_state; + struct work_struct work; + union { + struct rcu_head rcu; + struct work_struct gc; + }; +}; + +int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, + int elem_first_coalesce); +void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len); +int sk_msg_free(struct sock *sk, struct sk_msg *msg); +int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg); +void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes); +void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, + u32 bytes); + +void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes); + +int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, + struct sk_msg *msg, u32 bytes); +int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, + struct sk_msg *msg, u32 bytes); + +static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) +{ + WARN_ON(i == msg->sg.end && bytes); +} + +static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) +{ + if (psock->apply_bytes) { + if (psock->apply_bytes < bytes) + psock->apply_bytes = 0; + else + psock->apply_bytes -= bytes; + } +} + +#define sk_msg_iter_var_prev(var) \ + do { \ + if (var == 0) \ + var = MAX_MSG_FRAGS - 1; \ + else \ + var--; \ + } while (0) + +#define sk_msg_iter_var_next(var) \ + do { \ + var++; \ + if (var == MAX_MSG_FRAGS) \ + var = 0; \ + } while (0) + +#define sk_msg_iter_prev(msg, which) \ + sk_msg_iter_var_prev(msg->sg.which) + +#define sk_msg_iter_next(msg, which) \ + sk_msg_iter_var_next(msg->sg.which) + +static inline void sk_msg_clear_meta(struct sk_msg *msg) +{ + memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy)); +} + +static inline void sk_msg_init(struct sk_msg *msg) +{ + memset(msg, 0, sizeof(*msg)); + sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data)); +} + +static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, + int which, u32 size) +{ + dst->sg.data[which] = src->sg.data[which]; + dst->sg.data[which].length = size; + src->sg.data[which].length -= size; + src->sg.data[which].offset += size; +} + +static inline u32 sk_msg_elem_used(const struct sk_msg *msg) +{ + return msg->sg.end >= msg->sg.start ? + msg->sg.end - msg->sg.start : + msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); +} + +static inline bool sk_msg_full(const struct sk_msg *msg) +{ + return (msg->sg.end == msg->sg.start) && msg->sg.size; +} + +static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) +{ + return &msg->sg.data[which]; +} + +static inline struct page *sk_msg_page(struct sk_msg *msg, int which) +{ + return sg_page(sk_msg_elem(msg, which)); +} + +static inline bool sk_msg_to_ingress(const struct sk_msg *msg) +{ + return msg->flags & BPF_F_INGRESS; +} + +static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) +{ + struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); + + if (msg->sg.copy[msg->sg.start]) { + msg->data = NULL; + msg->data_end = NULL; + } else { + msg->data = sg_virt(sge); + msg->data_end = msg->data + sge->length; + } +} + +static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, + u32 len, u32 offset) +{ + struct scatterlist *sge; + + get_page(page); + sge = sk_msg_elem(msg, msg->sg.end); + sg_set_page(sge, page, len, offset); + sg_unmark_end(sge); + + msg->sg.copy[msg->sg.end] = true; + msg->sg.size += len; + sk_msg_iter_next(msg, end); +} + +static inline struct sk_psock *sk_psock(const struct sock *sk) +{ + return rcu_dereference_sk_user_data(sk); +} + +static inline bool sk_has_psock(struct sock *sk) +{ + return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; +} + +static inline void sk_psock_queue_msg(struct sk_psock *psock, + struct sk_msg *msg) +{ + list_add_tail(&msg->list, &psock->ingress_msg); +} + +static inline void sk_psock_report_error(struct sk_psock *psock, int err) +{ + struct sock *sk = psock->sk; + + sk->sk_err = err; + sk->sk_error_report(sk); +} + +struct sk_psock *sk_psock_init(struct sock *sk, int node); + +int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); +void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); +void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); + +int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, + struct sk_msg *msg); + +static inline struct sk_psock_link *sk_psock_init_link(void) +{ + return kzalloc(sizeof(struct sk_psock_link), + GFP_ATOMIC | __GFP_NOWARN); +} + +static inline void sk_psock_free_link(struct sk_psock_link *link) +{ + kfree(link); +} + +struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); +#if defined(CONFIG_BPF_STREAM_PARSER) +void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); +#else +static inline void sk_psock_unlink(struct sock *sk, + struct sk_psock_link *link) +{ +} +#endif + +void __sk_psock_purge_ingress_msg(struct sk_psock *psock); + +static inline void sk_psock_cork_free(struct sk_psock *psock) +{ + if (psock->cork) { + sk_msg_free(psock->sk, psock->cork); + kfree(psock->cork); + psock->cork = NULL; + } +} + +static inline void sk_psock_update_proto(struct sock *sk, + struct sk_psock *psock, + struct proto *ops) +{ + psock->saved_unhash = sk->sk_prot->unhash; + psock->saved_close = sk->sk_prot->close; + psock->saved_write_space = sk->sk_write_space; + + psock->sk_proto = sk->sk_prot; + sk->sk_prot = ops; +} + +static inline void sk_psock_restore_proto(struct sock *sk, + struct sk_psock *psock) +{ + if (psock->sk_proto) { + sk->sk_prot = psock->sk_proto; + psock->sk_proto = NULL; + } +} + +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static inline struct sk_psock *sk_psock_get(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock && !refcount_inc_not_zero(&psock->refcnt)) + psock = NULL; + rcu_read_unlock(); + return psock; +} + +void sk_psock_stop(struct sock *sk, struct sk_psock *psock); +void sk_psock_destroy(struct rcu_head *rcu); +void sk_psock_drop(struct sock *sk, struct sk_psock *psock); + +static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) +{ + if (refcount_dec_and_test(&psock->refcnt)) + sk_psock_drop(sk, psock); +} + +static inline void psock_set_prog(struct bpf_prog **pprog, + struct bpf_prog *prog) +{ + prog = xchg(pprog, prog); + if (prog) + bpf_prog_put(prog); +} + +static inline void psock_progs_drop(struct sk_psock_progs *progs) +{ + psock_set_prog(&progs->msg_parser, NULL); + psock_set_prog(&progs->skb_parser, NULL); + psock_set_prog(&progs->skb_verdict, NULL); +} + +#endif /* _LINUX_SKMSG_H */ -- cgit v1.2.3 From d829e9c4112b52f4f00195900fd4c685f61365ab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 13 Oct 2018 02:45:59 +0200 Subject: tls: convert to generic sk_msg interface Convert kTLS over to make use of sk_msg interface for plaintext and encrypted scattergather data, so it reuses all the sk_msg helpers and data structure which later on in a second step enables to glue this to BPF. This also allows to remove quite a bit of open coded helpers which are covered by the sk_msg API. Recent changes in kTLs 80ece6a03aaf ("tls: Remove redundant vars from tls record structure") and 4e6d47206c32 ("tls: Add support for inplace records encryption") changed the data path handling a bit; while we've kept the latter optimization intact, we had to undo the former change to better fit the sk_msg model, hence the sg_aead_in and sg_aead_out have been brought back and are linked into the sk_msg sgs. Now the kTLS record contains a msg_plaintext and msg_encrypted sk_msg each. In the original code, the zerocopy_from_iter() has been used out of TX but also RX path. For the strparser skb-based RX path, we've left the zerocopy_from_iter() in decrypt_internal() mostly untouched, meaning it has been moved into tls_setup_from_iter() with charging logic removed (as not used from RX). Given RX path is not based on sk_msg objects, we haven't pursued setting up a dummy sk_msg to call into sk_msg_zerocopy_from_iter(), but it could be an option to prusue in a later step. Joint work with John. Signed-off-by: Daniel Borkmann Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/linux/skmsg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 95678103c4a0..4e84b3c2eff8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -102,6 +102,8 @@ struct sk_psock { int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce); +int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, + u32 off, u32 len); void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len); int sk_msg_free(struct sock *sk, struct sk_msg *msg); int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg); -- cgit v1.2.3 From d3b18ad31f93d0b6bae105c679018a1ba7daa9ca Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 13 Oct 2018 02:46:01 +0200 Subject: tls: add bpf support to sk_msg handling This work adds BPF sk_msg verdict program support to kTLS allowing BPF and kTLS to be combined together. Previously kTLS and sk_msg verdict programs were mutually exclusive in the ULP layer which created challenges for the orchestrator when trying to apply TCP based policy, for example. To resolve this, leveraging the work from previous patches that consolidates the use of sk_msg, we can finally enable BPF sk_msg verdict programs so they continue to run after the kTLS socket is created. No change in behavior when kTLS is not used in combination with BPF, the kselftest suite for kTLS also runs successfully. Joint work with Daniel. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/linux/skmsg.h | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 4e84b3c2eff8..0b919f0bc6d6 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -29,7 +29,11 @@ struct sk_msg_sg { u32 size; u32 copybreak; bool copy[MAX_MSG_FRAGS]; - struct scatterlist data[MAX_MSG_FRAGS]; + /* The extra element is used for chaining the front and sections when + * the list becomes partitioned (e.g. end < start). The crypto APIs + * require the chaining. + */ + struct scatterlist data[MAX_MSG_FRAGS + 1]; }; struct sk_msg { @@ -112,6 +116,7 @@ void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, u32 bytes); void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes); +void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes); int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); @@ -161,8 +166,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg) static inline void sk_msg_init(struct sk_msg *msg) { + BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS); memset(msg, 0, sizeof(*msg)); - sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data)); + sg_init_marker(msg->sg.data, MAX_MSG_FRAGS); } static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, @@ -174,6 +180,12 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, src->sg.data[which].offset += size; } +static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) +{ + memcpy(dst, src, sizeof(*src)); + sk_msg_init(src); +} + static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { return msg->sg.end >= msg->sg.start ? @@ -229,6 +241,26 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sk_msg_iter_next(msg, end); } +static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) +{ + do { + msg->sg.copy[i] = copy_state; + sk_msg_iter_var_next(i); + if (i == msg->sg.end) + break; + } while (1); +} + +static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start) +{ + sk_msg_sg_copy(msg, start, true); +} + +static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) +{ + sk_msg_sg_copy(msg, start, false); +} + static inline struct sk_psock *sk_psock(const struct sock *sk) { return rcu_dereference_sk_user_data(sk); @@ -245,6 +277,11 @@ static inline void sk_psock_queue_msg(struct sk_psock *psock, list_add_tail(&msg->list, &psock->ingress_msg); } +static inline bool sk_psock_queue_empty(const struct sk_psock *psock) +{ + return psock ? list_empty(&psock->ingress_msg) : true; +} + static inline void sk_psock_report_error(struct sk_psock *psock, int err) { struct sock *sk = psock->sk; -- cgit v1.2.3 From 9f9a742db40f95f4dc20fc7293de4ea6ddb24e47 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 9 Oct 2018 23:57:49 +0100 Subject: FDDI: defza: Support capturing outgoing SMT traffic DEC FDDIcontroller 700 (DEFZA) uses a Tx/Rx queue pair to communicate SMT frames with adapter's firmware. Any SMT frame received from the RMC via the Rx queue is queued back by the driver to the SMT Rx queue for the firmware to process. Similarly the firmware uses the SMT Tx queue to supply the driver with SMT frames which are queued back to the Tx queue for the RMC to send to the ring. When a network tap is attached to an FDDI interface handled by `defza' any incoming SMT frames captured are queued to our usual processing of network data received, which in turn delivers them to any listening taps. However the outgoing SMT frames produced by the firmware bypass our network protocol stack and are therefore not delivered to taps. This in turn means that taps are missing a part of network traffic sent by the adapter, which may make it more difficult to track down network problems or do general traffic analysis. Call `dev_queue_xmit_nit' then in the SMT Tx path, having checked that a network tap is attached, with a newly-created `dev_nit_active' helper wrapping the usual condition used in the transmit path. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 22e4ef7bb701..dc1d9ed33b31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3645,6 +3645,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return 0; } +bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; -- cgit v1.2.3 From 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Oct 2018 09:37:52 -0700 Subject: tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh In EDT design, I made the mistake of using tcp_wstamp_ns to store the last tcp_clock_ns() sample and to store the pacing virtual timer. This causes major regressions at high speed flows. Introduce tcp_clock_cache to store last tcp_clock_ns(). This is needed because some arches have slow high-resolution kernel time service. tcp_wstamp_ns is only updated when a packet is sent. Note that we can remove tcp_mstamp in the future since tcp_mstamp is essentially tcp_clock_cache/1000, so the apparent socket size increase is temporary. Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 848f5b25e178..8ed77bb4ed86 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -249,6 +249,7 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ u64 tcp_mstamp; /* most recent packet received/sent */ -- cgit v1.2.3 From 22e6c58b8c2843337ec4e8464b1ce6e869ca5bf4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:41 -0700 Subject: netlink: Add answer_flags to netlink_callback With dump filtering we need a way to ensure the NLM_F_DUMP_FILTERED flag is set on a message back to the user if the data returned is influenced by some input attributes. Normally this can be done as messages are added to the skb, but if the filter results in no data being returned, the user could be confused as to why. This patch adds answer_flags to the netlink_callback allowing dump handlers to set the NLM_F_DUMP_FILTERED at a minimum in the NLMSG_DONE message ensuring the flag gets back to the user. The netlink_callback space is initialized to 0 via a memset in __netlink_dump_start, so init of the new answer_flags is covered. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 72580f1a72a2..4da90a6ab536 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -180,6 +180,7 @@ struct netlink_callback { u16 family; u16 min_dump_alloc; bool strict_check; + u16 answer_flags; unsigned int prev_seq, seq; long args[6]; }; -- cgit v1.2.3 From e1cedae1ba6b09ae8376c1486712bf91ea0dfc41 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:46 -0700 Subject: ipmr: Refactor mr_rtm_dumproute Move per-table loops from mr_rtm_dumproute to mr_table_dump and export mr_table_dump for dumps by specific table id. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 6675b9f81979..db85373c8d15 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -283,6 +283,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg); int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mr_mfc *c, struct rtmsg *rtm); +int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, + struct netlink_callback *cb, + int (*fill)(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), -- cgit v1.2.3 From cb167893f41e21e6bd283d78e53489289dc0592d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:47 -0700 Subject: net: Plumb support for filtering ipv4 and ipv6 multicast route dumps Implement kernel side filtering of routes by egress device index and table id. If the table id is given in the filter, lookup table and call mr_table_dump directly for it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index db85373c8d15..34de06b426ef 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -7,6 +7,7 @@ #include #include #include +#include /** * struct vif_device - interface representor for multicast routing @@ -288,7 +289,7 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, int (*fill)(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock); + spinlock_t *lock, struct fib_dump_filter *filter); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), @@ -296,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock); + spinlock_t *lock, struct fib_dump_filter *filter); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, @@ -346,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock) + spinlock_t *lock, struct fib_dump_filter *filter) { return -EINVAL; } -- cgit v1.2.3 From a218dc82f0b5c6c8ad3d58c9870ed69e26c08b3e Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 10 Oct 2018 09:57:13 +0200 Subject: netfilter: nft_osf: Add ttl option support Add ttl option support to the nftables "osf" expression. Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_osf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h index ecf7dab81e9e..c6000046c966 100644 --- a/include/linux/netfilter/nfnetlink_osf.h +++ b/include/linux/netfilter/nfnetlink_osf.h @@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family, const struct list_head *nf_osf_fingers); const char *nf_osf_find(const struct sk_buff *skb, - const struct list_head *nf_osf_fingers); + const struct list_head *nf_osf_fingers, + const int ttl_check); #endif /* _NFOSF_H */ -- cgit v1.2.3 From c56a8be7e7aa855ebcccf0e9d9eba2216514d399 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Tue, 16 Oct 2018 03:59:20 -0700 Subject: qed: Add supported link and advertise link to display in ethtool. Added transceiver type, speed capability and board types in HSI, are utilizing to display the accurate link information in ethtool. Signed-off-by: Rahul Verma Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index dee3c9c744f7..a47321a0d572 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -667,15 +667,35 @@ enum qed_link_mode_bits { QED_LM_Autoneg_BIT = BIT(1), QED_LM_Asym_Pause_BIT = BIT(2), QED_LM_Pause_BIT = BIT(3), - QED_LM_1000baseT_Half_BIT = BIT(4), - QED_LM_1000baseT_Full_BIT = BIT(5), + QED_LM_1000baseT_Full_BIT = BIT(4), + QED_LM_10000baseT_Full_BIT = BIT(5), QED_LM_10000baseKR_Full_BIT = BIT(6), QED_LM_20000baseKR2_Full_BIT = BIT(7), QED_LM_25000baseKR_Full_BIT = BIT(8), QED_LM_40000baseLR4_Full_BIT = BIT(9), QED_LM_50000baseKR2_Full_BIT = BIT(10), QED_LM_100000baseKR4_Full_BIT = BIT(11), - QED_LM_COUNT = 11 + QED_LM_2500baseX_Full_BIT = BIT(12), + QED_LM_Backplane_BIT = BIT(13), + QED_LM_1000baseKX_Full_BIT = BIT(14), + QED_LM_10000baseKX4_Full_BIT = BIT(15), + QED_LM_10000baseR_FEC_BIT = BIT(16), + QED_LM_40000baseKR4_Full_BIT = BIT(17), + QED_LM_40000baseCR4_Full_BIT = BIT(18), + QED_LM_40000baseSR4_Full_BIT = BIT(19), + QED_LM_25000baseCR_Full_BIT = BIT(20), + QED_LM_25000baseSR_Full_BIT = BIT(21), + QED_LM_50000baseCR2_Full_BIT = BIT(22), + QED_LM_100000baseSR4_Full_BIT = BIT(23), + QED_LM_100000baseCR4_Full_BIT = BIT(24), + QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25), + QED_LM_50000baseSR2_Full_BIT = BIT(26), + QED_LM_1000baseX_Full_BIT = BIT(27), + QED_LM_10000baseCR_Full_BIT = BIT(28), + QED_LM_10000baseSR_Full_BIT = BIT(29), + QED_LM_10000baseLR_Full_BIT = BIT(30), + QED_LM_10000baseLRM_Full_BIT = BIT(31), + QED_LM_COUNT = 32 }; struct qed_link_params { -- cgit v1.2.3 From 94a04d1d3d3681adde1a3e022b25dbac7b345b7e Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Tue, 9 Oct 2018 12:05:12 +0300 Subject: net/mlx5: Expose DC scatter to CQE capability bit dc_req_scat_data_cqe capability bit determines if requester scatter to cqe is available for 64 bytes CQE over DC transport type. Signed-off-by: Yonatan Cohen Reviewed-by: Guy Levi Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 68f4d5f9d929..0f460fb22c31 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1005,7 +1005,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 umr_modify_atomic_disabled[0x1]; u8 umr_indirect_mkey_disabled[0x1]; u8 umr_fence[0x2]; - u8 reserved_at_20c[0x3]; + u8 dc_req_scat_data_cqe[0x1]; + u8 reserved_at_20d[0x2]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; -- cgit v1.2.3 From 3f4c3127d332000530349db4843deece27fe5e0c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Oct 2018 10:36:01 -0700 Subject: bpf: sockmap, fix skmsg recvmsg handler to track size correctly When converting sockmap to new skmsg generic data structures we missed that the recvmsg handler did not correctly use sg.size and instead was using individual elements length. The result is if a sock is closed with outstanding data we omit the call to sk_mem_uncharge() and can get the warning below. [ 66.728282] WARNING: CPU: 6 PID: 5783 at net/core/stream.c:206 sk_stream_kill_queues+0x1fa/0x210 To fix this correct the redirect handler to xfer the size along with the scatterlist and also decrement the size from the recvmsg handler. Now when a sock is closed the remaining 'size' will be decremented with sk_mem_uncharge(). Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 0b919f0bc6d6..31df0d9fa536 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -176,6 +176,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, { dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; + dst->sg.size += size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } -- cgit v1.2.3 From 8734a162c13b1a893e7dff8de0df81fed04c51a6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 16 Oct 2018 11:07:59 -0700 Subject: bpf: skmsg, improve sk_msg_used_element to work in cork context Currently sk_msg_used_element is only called in zerocopy context where cork is not possible and if this case happens we fallback to copy mode. However the helper is more useful if it works in all contexts. This patch resolved the case where if end == head indicating a full or empty ring the helper always reports an empty ring. To fix this add a test for the full ring case to avoid reporting a full ring has 0 elements. This additional functionality will be used in the next patches from recvmsg context where end = head with a full ring is a valid case. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 31df0d9fa536..22347b08e1f8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -187,18 +187,21 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) sk_msg_init(src); } +static inline bool sk_msg_full(const struct sk_msg *msg) +{ + return (msg->sg.end == msg->sg.start) && msg->sg.size; +} + static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { + if (sk_msg_full(msg)) + return MAX_MSG_FRAGS; + return msg->sg.end >= msg->sg.start ? msg->sg.end - msg->sg.start : msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); } -static inline bool sk_msg_full(const struct sk_msg *msg) -{ - return (msg->sg.end == msg->sg.start) && msg->sg.size; -} - static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) { return &msg->sg.data[which]; -- cgit v1.2.3 From b8aee82250b7d90a32b11ba208656f52dbaca342 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 2 Oct 2018 22:57:24 +0000 Subject: net/mlx5: E-Switch, Get counters for offloaded flows from callers There's no real reason for the e-switch logic to manage the creation of counters for offloaded flows. The API already has the directive for the caller to denote they want to attach a counter to the created flow. As such, we go and move the management of flow counters to the mlx5e tc offload logic. This also lets us remove an inelegant interface where the FS layer had to provide a way to retrieve a counter from a flow rule. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index b1c026f1c8ba..74d0ea146c9a 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -186,7 +186,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_flow_destination *new_dest, struct mlx5_flow_destination *old_dest); -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, -- cgit v1.2.3 From 171c7625bef999848ee6032c6dde96e7330c4d15 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 3 Oct 2018 00:03:35 +0000 Subject: net/mlx5: Use flow counter IDs and not the wrapping cache object Currently, when a flow rule is created using the FS core layer, the caller has to pass the entire flow counter object and not just the counter HW handle (ID). This requires both the FS core and the caller to have knowledge about the inner implementation of the FS layer flow counters cache and limits the possible users. Move to use the counter ID across the place when dealing with flows. Doing this decoupling, now can we privatize the inner implementation of the flow counters. Signed-off-by: Mark Bloch Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 74d0ea146c9a..a5fc62184195 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -92,7 +92,7 @@ struct mlx5_flow_destination { u32 tir_num; u32 ft_num; struct mlx5_flow_table *ft; - struct mlx5_fc *counter; + u32 counter_id; struct { u16 num; u16 vhca_id; @@ -192,6 +192,7 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, u64 *packets, u64 *bytes); +u32 mlx5_fc_id(struct mlx5_fc *counter); int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); -- cgit v1.2.3 From b9aa0ba17af5afa13605eb6ea91f1974da97a2e2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 31 May 2018 11:50:23 +0300 Subject: net/mlx5: Add cap bits for multi fdb encap If set, the firmware supports creating of flow tables with encap enabled while VFs are configured, if we already created one (restriction still applies on the first creation). Signed-off-by: Paul Blakey Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 15e36198f85f..963611820006 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -584,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_0[0x1c]; u8 fdb_multi_path_to_table[0x1]; - u8 reserved_at_1d[0x1e3]; + u8 reserved_at_1d[0x1]; + u8 multi_fdb_encap[0x1]; + u8 reserved_at_1e[0x1e1]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; -- cgit v1.2.3 From 328edb499f99126946845ece477c9c1afe8631af Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 3 Jul 2018 11:13:00 +0300 Subject: net/mlx5: Split FDB fast path prio to multiple namespaces Towards supporting multi-chains and priorities, split the FDB fast path to multiple namespaces (sub namespaces), each with multiple priorities. This patch adds a new flow steering type, FS_TYPE_PRIO_CHAINS, which is like current FS_TYPE_PRIO, but may contain only namespaces, and those will be in parallel to one another in terms of managing of the flow tables connections inside them. Meaning, while searching for the next or previous flow table to connect for a new table inside such namespace we skip the parallel namespaces in the same level under the FS_TYPE_PRIO_CHAINS prio we originated from. We use this new type for splitting the fast path prio into multiple parallel namespaces, each containing normal prios. The prios inside them (and their tables) will be connected to one another, but not from one parallel namespace to another, instead the last prio in each namespace will be connected to the next prio in the containing FDB namespace, which is the slow path prio. Signed-off-by: Paul Blakey Acked-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index a5fc62184195..f8d00872c7d3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -101,6 +101,8 @@ struct mlx5_flow_destination { }; }; +struct mlx5_flow_namespace * +mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n); struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); -- cgit v1.2.3 From d5634fee245f9e92787e3a34ef621fc12b2cbf16 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 20 Sep 2018 12:17:48 +0200 Subject: net/mlx5: Add a no-append flow insertion mode If no-append flag is set, we will add a new FTE, instead of appending the actions of the inserted rule when the same match already exists. While here, move the has_flow_tag boolean indicator to be a flag too. This patch doesn't change any functionality. Signed-off-by: Paul Blakey Reviewed-by: Or Gerlitz Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index f8d00872c7d3..5660f07d3be0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -158,20 +158,28 @@ struct mlx5_fs_vlan { #define MLX5_FS_VLAN_DEPTH 2 +enum { + FLOW_ACT_HAS_TAG = BIT(0), + FLOW_ACT_NO_APPEND = BIT(1), +}; + struct mlx5_flow_act { u32 action; - bool has_flow_tag; u32 flow_tag; u32 reformat_id; u32 modify_id; uintptr_t esp_id; + u32 flags; struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH]; struct ib_counters *counters; }; #define MLX5_DECLARE_FLOW_ACT(name) \ - struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - MLX5_FS_DEFAULT_FLOW_TAG, 0, 0} + struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \ + .reformat_id = 0, \ + .modify_id = 0, \ + .flags = 0, } /* Single destination per rule. * Group ID is implied by the match criteria. -- cgit v1.2.3 From 82385b0d2d2504aee51aa3fb40ebfb03603f64c3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Oct 2018 15:01:37 +0200 Subject: net: skbuff.h: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Acked-by: Kees Cook Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 119d092c6b13..0ba687454267 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); + /* fall through */ case 24: diffs |= __it_diff(a, b, 64); + /* fall through */ case 16: diffs |= __it_diff(a, b, 64); + /* fall through */ case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); + /* fall through */ case 20: diffs |= __it_diff(a, b, 64); + /* fall through */ case 12: diffs |= __it_diff(a, b, 64); + /* fall through */ case 4: diffs |= __it_diff(a, b, 32); break; } -- cgit v1.2.3 From 4972e6fa3a04032830bc3d6bb343d08ab3546773 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 12 Sep 2018 15:36:41 +0300 Subject: net/mlx5: Refactor fragmented buffer struct fields and init flow Take struct mlx5_frag_buf out of mlx5_frag_buf_ctrl, as it is not needed to manage and control the datapath of the fragmented buffers API. struct mlx5_frag_buf contains control info to manage the allocation and de-allocation of the fragmented buffer. Its fields are not relevant for datapath, so here I take them out of the struct mlx5_frag_buf_ctrl, except for the fragments array itself. In addition, modified mlx5_fill_fbc to initialise the frags pointers as well. This implies that the buffer must be allocated before the function is called. A set of type-specific *_get_byte_size() functions are replaced by a generic one. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 94ffd02af7cd..e10f61a1f77d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -357,7 +357,7 @@ struct mlx5_frag_buf { }; struct mlx5_frag_buf_ctrl { - struct mlx5_frag_buf frag_buf; + struct mlx5_buf_list *frags; u32 sz_m1; u16 frag_sz_m1; u16 strides_offset; @@ -994,10 +994,12 @@ static inline u32 mlx5_base_mkey(const u32 key) return key & 0xffffff00u; } -static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, +static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags, + u8 log_stride, u8 log_sz, u16 strides_offset, struct mlx5_frag_buf_ctrl *fbc) { + fbc->frags = frags; fbc->log_stride = log_stride; fbc->log_sz = log_sz; fbc->sz_m1 = (1 << fbc->log_sz) - 1; @@ -1006,18 +1008,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz, fbc->strides_offset = strides_offset; } -static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz, +static inline void mlx5_init_fbc(struct mlx5_buf_list *frags, + u8 log_stride, u8 log_sz, struct mlx5_frag_buf_ctrl *fbc) { - mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc); -} - -static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc, - void *cqc) -{ - mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz), - MLX5_GET(cqc, cqc, log_cq_size), - fbc); + mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc); } static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, @@ -1028,8 +1023,7 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc, ix += fbc->strides_offset; frag = ix >> fbc->log_frag_strides; - return fbc->frag_buf.frags[frag].buf + - ((fbc->frag_sz_m1 & ix) << fbc->log_stride); + return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); } int mlx5_cmd_init(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 4b5b9c7d972e8a7b1e7691c7c921ec0d6dec33b9 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 9 Oct 2018 14:16:43 +0300 Subject: net/mlx5: Add FEC fields to Port Phy Link Mode (PPLM) reg Added FEC related fields to PPLM layout. These fields are needed to set and query FEC policy for different link speeds. Signed-off-by: Shay Agroskin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e10f61a1f77d..696ed3f7f894 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -133,6 +133,7 @@ enum { MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, MLX5_REG_PMLP = 0x5002, + MLX5_REG_PPLM = 0x5023, MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 963611820006..47b09a742ae5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -7828,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits { struct mlx5_ifc_pplm_reg_bits { u8 reserved_at_0[0x8]; - u8 local_port[0x8]; - u8 reserved_at_10[0x10]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0x20]; - u8 port_profile_mode[0x8]; - u8 static_port_profile[0x8]; - u8 active_port_profile[0x8]; - u8 reserved_at_58[0x8]; + u8 port_profile_mode[0x8]; + u8 static_port_profile[0x8]; + u8 active_port_profile[0x8]; + u8 reserved_at_58[0x8]; - u8 retransmission_active[0x8]; - u8 fec_mode_active[0x18]; + u8 retransmission_active[0x8]; + u8 fec_mode_active[0x18]; - u8 reserved_at_80[0x20]; + u8 rs_fec_correction_bypass_cap[0x4]; + u8 reserved_at_84[0x8]; + u8 fec_override_cap_56g[0x4]; + u8 fec_override_cap_100g[0x4]; + u8 fec_override_cap_50g[0x4]; + u8 fec_override_cap_25g[0x4]; + u8 fec_override_cap_10g_40g[0x4]; + + u8 rs_fec_correction_bypass_admin[0x4]; + u8 reserved_at_a4[0x8]; + u8 fec_override_admin_56g[0x4]; + u8 fec_override_admin_100g[0x4]; + u8 fec_override_admin_50g[0x4]; + u8 fec_override_admin_25g[0x4]; + u8 fec_override_admin_10g_40g[0x4]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -8137,7 +8151,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits { struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; - u8 port_access_reg_cap_mask_63_to_32[0x20]; + + u8 port_access_reg_cap_mask_63_to_36[0x1c]; + u8 pplm[0x1]; + u8 port_access_reg_cap_mask_34_to_32[0x3]; u8 port_access_reg_cap_mask_31_to_13[0x13]; u8 pbmc[0x1]; -- cgit v1.2.3 From 67daf1186086ad4b2ec09b8078b835936977d06a Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Sun, 30 Sep 2018 09:58:08 +0300 Subject: net/mlx5: Added "per_lane_error_counters" cap bit to PCAM Added "Per lane raw errors" capability bit in Ports Capabilities Mask (PCAM) enhanced features layout. This bit determines if the fields "phy_raw_errors_laneX" in "Physical Layer statistical" counters group are supported. Signed-off-by: Shay Agroskin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 47b09a742ae5..dbff9ff28f2c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8140,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 rx_icrc_encapsulated_counter[0x1]; u8 reserved_at_6e[0x8]; u8 pfcc_mask[0x1]; - u8 reserved_at_77[0x4]; + u8 reserved_at_77[0x3]; + u8 per_lane_error_counters[0x1]; u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; -- cgit v1.2.3 From 144991602e6a14d667b295f1b099e609ce857772 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:09 +0200 Subject: bpf: rename stack trace map operations In the following patches queue and stack maps (FIFO and LIFO datastructures) will be implemented. In order to avoid confusion and a possible name clash rename stack_map_ops to stack_trace_map_ops Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa48343a5ea1..7bad4e1947ed 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) #ifdef CONFIG_PERF_EVENTS -BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -- cgit v1.2.3 From 2ea864c58f19bf70a0e2415f9f1c53814e07f1b4 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:20 +0200 Subject: bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone used to save the value of a map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra argument. This will be used in the following patch that implements some new helpers that receive a pointer to be filled with a map value. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e60fff48288b..0f8b863e0229 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -138,6 +138,7 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack -- cgit v1.2.3 From f1a2e44a3aeccb3ff18d3ccc0b0203e70b95bd92 Mon Sep 17 00:00:00 2001 From: Mauricio Vasquez B Date: Thu, 18 Oct 2018 15:16:25 +0200 Subject: bpf: add queue and stack maps Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs. These maps support peek, pop and push operations that are exposed to eBPF programs through the new bpf_map[peek/pop/push] helpers. Those operations are exposed to userspace applications through the already existing syscalls in the following way: BPF_MAP_LOOKUP_ELEM -> peek BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop BPF_MAP_UPDATE_ELEM -> push Queue/stack maps are implemented using a buffer, tail and head indexes, hence BPF_F_NO_PREALLOC is not supported. As opposite to other maps, queue and stack do not use RCU for protecting maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE argument that is a pointer to a memory zone where to save the value of a map. Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra argument. Our main motivation for implementing queue/stack maps was to keep track of a pool of elements, like network ports in a SNAT, however we forsee other use cases, like for exampling saving last N kernel events in a map and then analysing from userspace. Signed-off-by: Mauricio Vasquez B Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++++++ include/linux/bpf_types.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0f8b863e0229..33014ae73103 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,9 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + int (*map_pop_elem)(struct bpf_map *map, void *value); + int (*map_peek_elem)(struct bpf_map *map, void *value); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_push_elem_proto; +extern const struct bpf_func_proto bpf_map_pop_elem_proto; +extern const struct bpf_func_proto bpf_map_peek_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 7bad4e1947ed..44d9ab4809bd 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) -- cgit v1.2.3 From b39b5f411dcfce28ff954e5d6acb2c11be3cb0ec Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 19 Oct 2018 09:57:57 -0700 Subject: bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the skb. This patch enables direct access of skb for these programs. Two helper functions bpf_compute_and_save_data_end() and bpf_restore_data_end() are introduced. There are used in __cgroup_bpf_run_filter_skb(), to compute proper data_end for the BPF program, and restore original data afterwards. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 5771874bc01e..91b4c934f02e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +/* Similar to bpf_compute_data_pointers(), except that save orginal + * data in cb->data and cb->meta_data for restore. + */ +static inline void bpf_compute_and_save_data_end( + struct sk_buff *skb, void **saved_data_end) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + *saved_data_end = cb->data_end; + cb->data_end = skb->data + skb_headlen(skb); +} + +/* Restore data saved by bpf_compute_data_pointers(). */ +static inline void bpf_restore_data_end( + struct sk_buff *skb, void *saved_data_end) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + cb->data_end = saved_data_end; +} + static inline u8 *bpf_skb_cb(struct sk_buff *skb) { /* eBPF programs may read/write skb->cb[] area to transfer meta -- cgit v1.2.3 From 5032d079909d1ac5c2535acc32d5f01cd245d8ea Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 18 Oct 2018 13:58:35 -0700 Subject: bpf: skmsg, fix psock create on existing kcm/tls port Before using the psock returned by sk_psock_get() when adding it to a sockmap we need to ensure it is actually a sockmap based psock. Previously we were only checking this after incrementing the reference counter which was an error. This resulted in a slab-out-of-bounds error when the psock was not actually a sockmap type. This moves the check up so the reference counter is only used if it is a sockmap psock. Eric reported the following KASAN BUG, BUG: KASAN: slab-out-of-bounds in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: slab-out-of-bounds in refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120 Read of size 4 at addr ffff88019548be58 by task syz-executor4/22387 CPU: 1 PID: 22387 Comm: syz-executor4 Not tainted 4.19.0-rc7+ #264 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120 sk_psock_get include/linux/skmsg.h:379 [inline] sock_map_link.isra.6+0x41f/0xe30 net/core/sock_map.c:178 sock_hash_update_common+0x19b/0x11e0 net/core/sock_map.c:669 sock_hash_update_elem+0x306/0x470 net/core/sock_map.c:738 map_update_elem+0x819/0xdf0 kernel/bpf/syscall.c:818 Signed-off-by: John Fastabend Reported-by: Eric Dumazet Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 22347b08e1f8..84e18863f6a4 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -270,11 +270,6 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } -static inline bool sk_has_psock(struct sock *sk) -{ - return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; -} - static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { @@ -374,6 +369,26 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock, return test_bit(bit, &psock->state); } +static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) +{ + struct sk_psock *psock; + + rcu_read_lock(); + psock = sk_psock(sk); + if (psock) { + if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { + psock = ERR_PTR(-EBUSY); + goto out; + } + + if (!refcount_inc_not_zero(&psock->refcnt)) + psock = ERR_PTR(-EBUSY); + } +out: + rcu_read_unlock(); + return psock; +} + static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; -- cgit v1.2.3 From c9fbd71f73094311b31ee703a918e9e0df502cef Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Thu, 18 Oct 2018 11:18:26 -0400 Subject: netpoll: allow cleanup to be synchronous This fixes a problem introduced by: commit 2cde6acd49da ("netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock") When using netconsole on a bond, __netpoll_cleanup can asynchronously recurse multiple times, each __netpoll_free_async call can result in more __netpoll_free_async's. This means there is now a race between cleanup_work queues on multiple netpoll_info's on multiple devices and the configuration of a new netpoll. For example if a netconsole is set to enable 0, reconfigured, and enable 1 immediately, this netconsole will likely not work. Given the reason for __netpoll_free_async is it can be called when rtnl is not locked, if it is locked, we should be able to execute synchronously. It appears to be locked everywhere it's called from. Generalize the design pattern from the teaming driver for current callers of __netpoll_free_async. CC: Neil Horman CC: "David S. Miller" Signed-off-by: Debabrata Banerjee Signed-off-by: David S. Miller --- include/linux/netpoll.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 3ef82d3a78db..676f1ff161a9 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -31,8 +31,6 @@ struct netpoll { bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; - - struct work_struct cleanup_work; }; struct netpoll_info { @@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); -void __netpoll_free_async(struct netpoll *np); +void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, struct net_device *dev); -- cgit v1.2.3 From bff5b4b3737219195ca0caef4ff7884303cb5dc1 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Thu, 18 Oct 2018 15:06:01 -0400 Subject: net: phy: micrel: add Microchip KSZ9131 initial driver Add support for Microchip Technology KSZ9131 10/100/1000 Ethernet PHY Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 472fa4d4ea62..7361cd3fddc1 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -31,6 +31,7 @@ #define PHY_ID_KSZ8081 0x00221560 #define PHY_ID_KSZ8061 0x00221570 #define PHY_ID_KSZ9031 0x00221620 +#define PHY_ID_KSZ9131 0x00221640 #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 -- cgit v1.2.3 From 6fff607e2f14bd7c63c06c464a6f93b8efbabe28 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Oct 2018 19:56:49 -0700 Subject: bpf: sk_msg program helper bpf_msg_push_data This allows user to push data into a msg using sk_msg program types. The format is as follows, bpf_msg_push_data(msg, offset, len, flags) this will insert 'len' bytes at offset 'offset'. For example to prepend 10 bytes at the front of the message the user can, bpf_msg_push_data(msg, 0, 10, 0); This will invalidate data bounds so BPF user will have to then recheck data bounds after calling this. After this the msg size will have been updated and the user is free to write into the added bytes. We allow any offset/len as long as it is within the (data, data_end) range. However, a copy will be required if the ring is full and its possible for the helper to fail with ENOMEM or EINVAL errors which need to be handled by the BPF program. This can be used similar to XDP metadata to pass data between sk_msg layer and lower layers. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/skmsg.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 84e18863f6a4..2a11e9d91dfa 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) return &msg->sg.data[which]; } +static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which) +{ + return msg->sg.data[which]; +} + static inline struct page *sk_msg_page(struct sk_msg *msg, int which) { return sg_page(sk_msg_elem(msg, which)); -- cgit v1.2.3 From 876dcf2f3aaa0f68d437b368b93a4c4b81521191 Mon Sep 17 00:00:00 2001 From: Olivier Brunel Date: Sat, 20 Oct 2018 19:39:56 +0200 Subject: umh: Add command line to user mode helpers User mode helpers were spawned without a command line, and because an empty command line is used by many tools to identify processes as kernel threads, this could cause some issues. Notably during killing spree on shutdown, since such helper would then be skipped (i.e. not killed) which would result in the process remaining alive, and thus preventing unmouting of the rootfs (as experienced with the bpfilter umh). Fixes: 449325b52b7a ("umh: introduce fork_usermode_blob() helper") Signed-off-by: Olivier Brunel Signed-off-by: David S. Miller --- include/linux/umh.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/umh.h b/include/linux/umh.h index 5c812acbb80a..235f51b62c71 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -44,6 +44,7 @@ struct subprocess_info *call_usermodehelper_setup_file(struct file *file, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); struct umh_info { + const char *cmdline; struct file *pipe_to_umh; struct file *pipe_from_umh; pid_t pid; -- cgit v1.2.3