From 6c5c9581044dd6e0cd284ab653502fb9264f08b6 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 28 Aug 2018 14:44:28 +0200
Subject: net: add napi_if_scheduled_mark_missed

The function napi_if_scheduled_mark_missed is used to check if the
NAPI context is scheduled, if so set NAPIF_STATE_MISSED and return
true. Used by the AF_XDP zero-copy i40e Tx code implementation in
order to make sure that irq affinity is honored by the napi context.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/netdevice.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..4271f6b4e419 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n)
 		barrier();
 }
 
+/**
+ *	napi_if_scheduled_mark_missed - if napi is running, set the
+ *	NAPIF_STATE_MISSED
+ *	@n: NAPI context
+ *
+ * If napi is running, set the NAPIF_STATE_MISSED, and return true if
+ * NAPI is scheduled.
+ **/
+static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
+{
+	unsigned long val, new;
+
+	do {
+		val = READ_ONCE(n->state);
+		if (val & NAPIF_STATE_DISABLE)
+			return true;
+
+		if (!(val & NAPIF_STATE_SCHED))
+			return false;
+
+		new = val | NAPIF_STATE_MISSED;
+	} while (cmpxchg(&n->state, val, new) != val);
+
+	return true;
+}
+
 enum netdev_queue_state_t {
 	__QUEUE_STATE_DRV_XOFF,
 	__QUEUE_STATE_STACK_XOFF,
-- 
cgit v1.2.3


From 679c782de14bd48c19dd74cd1af20a2bc05dd936 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 22 Aug 2018 20:02:19 +0100
Subject: bpf/verifier: per-register parent pointers

By giving each register its own liveness chain, we elide the skip_callee()
 logic.  Instead, each register's parent is the state it inherits from;
 both check_func_call() and prepare_func_exit() automatically connect
 reg states to the correct chain since when they copy the reg state across
 (r1-r5 into the callee as args, and r0 out as the return value) they also
 copy the parent pointer.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 38b04f559ad3..b42b60a83e19 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -41,6 +41,7 @@ enum bpf_reg_liveness {
 };
 
 struct bpf_reg_state {
+	/* Ordering of fields matters.  See states_equal() */
 	enum bpf_reg_type type;
 	union {
 		/* valid when type == PTR_TO_PACKET */
@@ -59,7 +60,6 @@ struct bpf_reg_state {
 	 * came from, when one is tested for != NULL.
 	 */
 	u32 id;
-	/* Ordering of fields matters.  See states_equal() */
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
 	 * the actual value.
 	 * For pointer types, this represents the variable part of the offset
@@ -76,15 +76,15 @@ struct bpf_reg_state {
 	s64 smax_value; /* maximum possible (s64)value */
 	u64 umin_value; /* minimum possible (u64)value */
 	u64 umax_value; /* maximum possible (u64)value */
+	/* parentage chain for liveness checking */
+	struct bpf_reg_state *parent;
 	/* Inside the callee two registers can be both PTR_TO_STACK like
 	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
 	 * while another to the caller's stack. To differentiate them 'frameno'
 	 * is used which is an index in bpf_verifier_state->frame[] array
 	 * pointing to bpf_func_state.
-	 * This field must be second to last, for states_equal() reasons.
 	 */
 	u32 frameno;
-	/* This field must be last, for states_equal() reasons. */
 	enum bpf_reg_liveness live;
 };
 
@@ -107,7 +107,6 @@ struct bpf_stack_state {
  */
 struct bpf_func_state {
 	struct bpf_reg_state regs[MAX_BPF_REG];
-	struct bpf_verifier_state *parent;
 	/* index of call instruction that called into this func */
 	int callsite;
 	/* stack frame number of this function state from pov of
@@ -129,7 +128,6 @@ struct bpf_func_state {
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
-	struct bpf_verifier_state *parent;
 	u32 curframe;
 };
 
-- 
cgit v1.2.3


From 9b3004953503462a4fab31b85e44ae446d48f0bd Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Tue, 28 Aug 2018 19:56:58 +0200
Subject: ethtool: drop get_settings and set_settings callbacks

Since [gs]et_settings ethtool_ops callbacks have been deprecated in
February 2016, all in tree NIC drivers have been converted to provide
[gs]et_link_ksettings() and out of tree drivers have had enough time to do
the same.

Drop get_settings() and set_settings() and implement both ETHTOOL_[GS]SET
and ETHTOOL_[GS]LINKSETTINGS only using [gs]et_link_ksettings().

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f8a2245b70ac..afd9596ce636 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -183,14 +183,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 
 /**
  * struct ethtool_ops - optional netdev operations
- * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
- *	API. Get various device settings including Ethernet link
- *	settings. The @cmd parameter is expected to have been cleared
- *	before get_settings is called. Returns a negative error code
- *	or zero.
- * @set_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
- *	API. Set various device settings including Ethernet link
- *	settings.  Returns a negative error code or zero.
  * @get_drvinfo: Report driver/device information.  Should only set the
  *	@driver, @version, @fw_version and @bus_info fields.  If not
  *	implemented, the @driver and @bus_info fields will be filled in
@@ -297,19 +289,16 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
  *	queue has this number, ignore the inapplicable fields.
  *	Returns a negative error code or zero.
- * @get_link_ksettings: When defined, takes precedence over the
- *	%get_settings method. Get various device settings
- *	including Ethernet link settings. The %cmd and
- *	%link_mode_masks_nwords fields should be ignored (use
- *	%__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), any
- *	change to them will be overwritten by kernel. Returns a
- *	negative error code or zero.
- * @set_link_ksettings: When defined, takes precedence over the
- *	%set_settings method. Set various device settings including
- *	Ethernet link settings. The %cmd and %link_mode_masks_nwords
- *	fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS
- *	instead of the latter), any change to them will be overwritten
- *	by kernel. Returns a negative error code or zero.
+ * @get_link_ksettings: Get various device settings including Ethernet link
+ *	settings. The %cmd and %link_mode_masks_nwords fields should be
+ *	ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter),
+ *	any change to them will be overwritten by kernel. Returns a negative
+ *	error code or zero.
+ * @set_link_ksettings: Set various device settings including Ethernet link
+ *	settings. The %cmd and %link_mode_masks_nwords fields should be
+ *	ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter),
+ *	any change to them will be overwritten by kernel. Returns a negative
+ *	error code or zero.
  * @get_fecparam: Get the network device Forward Error Correction parameters.
  * @set_fecparam: Set the network device Forward Error Correction parameters.
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
@@ -329,8 +318,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  * of the generic netdev features interface.
  */
 struct ethtool_ops {
-	int	(*get_settings)(struct net_device *, struct ethtool_cmd *);
-	int	(*set_settings)(struct net_device *, struct ethtool_cmd *);
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
 	int	(*get_regs_len)(struct net_device *);
 	void	(*get_regs)(struct net_device *, struct ethtool_regs *, void *);
-- 
cgit v1.2.3


From a4e0109a19c554e44c832958e426303781e1ad30 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Mon, 20 Aug 2018 08:12:32 -0700
Subject: virtchnl: use u8 type for a field in the virtchnl_filter struct

The virtchnl_filter struct has a field called field_flags. A previous
commit mistakenly had the type to be a __u8. What we want is for the
field to be an unsigned 8 bit value, so let's just use the existing
kernel type u8 for that.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 212b3822d180..b41f7bc958ef 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -573,7 +573,7 @@ struct virtchnl_filter {
 	enum	virtchnl_flow_type flow_type;
 	enum	virtchnl_action action;
 	u32	action_meta;
-	__u8	field_flags;
+	u8	field_flags;
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
-- 
cgit v1.2.3


From aa7e80b220f3a543eefbe4b7e2c5d2b73e2e2ef7 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Mon, 3 Sep 2018 20:19:28 +0300
Subject: net/mlx5: Fix atomic_mode enum values

The field atomic_mode is 4 bits wide and therefore can hold values
from 0x0 to 0xf. Remove the unnecessary 20 bit shift that made the values
be incorrect. While that, remove unused enum values.

Fixes: 57cda166bbe0 ("net/mlx5: Add DCT command interface")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/driver.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..d885e9f0e054 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -163,10 +163,7 @@ enum mlx5_dcbx_oper_mode {
 };
 
 enum mlx5_dct_atomic_mode {
-	MLX5_ATOMIC_MODE_DCT_OFF        = 20,
-	MLX5_ATOMIC_MODE_DCT_NONE       = 0 << MLX5_ATOMIC_MODE_DCT_OFF,
-	MLX5_ATOMIC_MODE_DCT_IB_COMP    = 1 << MLX5_ATOMIC_MODE_DCT_OFF,
-	MLX5_ATOMIC_MODE_DCT_CX         = 2 << MLX5_ATOMIC_MODE_DCT_OFF,
+	MLX5_ATOMIC_MODE_DCT_CX         = 2,
 };
 
 enum {
-- 
cgit v1.2.3


From 8ce78257965e6cd49720e653867e766ecd38883f Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:41 +0300
Subject: net/mlx5: Add proper NIC TX steering flow tables support

Extend the ability to add steering rules to NIC TX flow tables.
For now, we are only adding TX bypass (egress) which is used by the RDMA
side. This will allow to shape outgoing traffic and tweak it if needed, for
example performing encapsulation or rewriting headers.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/device.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 11fa4e66afc5..f2281e69ab39 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1120,6 +1120,12 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
 
+#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \
+		MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap)
+
+#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap)
+
 #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
 
-- 
cgit v1.2.3


From 90c1d1b8da67330b09893d749401a45328b51704 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:42 +0300
Subject: net/mlx5: Export modify header alloc/dealloc functions

Those functions will be used by the RDMA side to create modify header
actions to be attached to flow steering rules via verbs.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 804516e4f483..0cbf4d5cb1ab 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -196,4 +196,10 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+			     u8 namespace, u8 num_actions,
+			     void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+				u32 modify_header_id);
+
 #endif
-- 
cgit v1.2.3


From 61444b458b01c95e55003d6f0b4d4c936fde51cb Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:44 +0300
Subject: net/mlx5: Break encap/decap into two separated flow table creation
 flags

Today we are able to attach encap and decap actions only to the FDB. In
preparation to enable those actions on the NIC flow tables, break the
single flag into two. Those flags control whatever a decap or encap
operations can be attached to the flow table created. For FDB, if
encapsulation is required, we set both of them.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0cbf4d5cb1ab..0194e62ad66a 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -45,7 +45,8 @@ enum {
 };
 
 enum {
-	MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
 };
 
 #define LEFTOVERS_RULE_NUM	 2
-- 
cgit v1.2.3


From e0e7a3861b6c6b673dc93e291ef11cf5e746b0c2 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:45 +0300
Subject: net/mlx5: Move header encap type to IFC header file

Those bits are hardware specification and should be defined in the
IFC header file.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f043d65b9bac..bd725e0924e5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4848,6 +4848,11 @@ struct mlx5_ifc_alloc_encap_header_out_bits {
 	u8         reserved_at_60[0x20];
 };
 
+enum {
+	MLX5_HEADER_TYPE_VXLAN = 0x0,
+	MLX5_HEADER_TYPE_NVGRE = 0x1,
+};
+
 struct mlx5_ifc_alloc_encap_header_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
-- 
cgit v1.2.3


From 60786f0987c0d9354e5330ee11615b16cdb448fe Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:46 +0300
Subject: {net, RDMA}/mlx5: Rename encap to reformat packet

Renames all encap mlx5_{core,ib} code to use the new naming of packet
reformat. This change doesn't introduce any function change and is
needed to properly reflect the operation being done by this action.
For example not only can we encapsulate a packet, but also decapsulate it.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/fs.h       |  4 ++--
 include/linux/mlx5/mlx5_ifc.h | 50 +++++++++++++++++++++----------------------
 2 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0194e62ad66a..37d0c08d0966 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -45,7 +45,7 @@ enum {
 };
 
 enum {
-	MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0),
 	MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
 };
 
@@ -160,7 +160,7 @@ struct mlx5_flow_act {
 	u32 action;
 	bool has_flow_tag;
 	u32 flow_tag;
-	u32 encap_id;
+	u32 reformat_id;
 	u32 modify_id;
 	uintptr_t esp_id;
 	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index bd725e0924e5..c79eaae28e59 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -243,8 +243,8 @@ enum {
 	MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
 	MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
 	MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
-	MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
-	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
+	MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
+	MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
 	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
@@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
 	u8         flow_table_modify[0x1];
-	u8         encap[0x1];
+	u8         reformat[0x1];
 	u8         decap[0x1];
 	u8         reserved_at_9[0x1];
 	u8         pop_vlan[0x1];
@@ -599,7 +599,7 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vxlan_encap_decap[0x1];
 	u8         nvgre_encap_decap[0x1];
 	u8         reserved_at_22[0x9];
-	u8         log_max_encap_headers[0x5];
+	u8         log_max_packet_reformat_context[0x5];
 	u8         reserved_2b[0x6];
 	u8         max_encap_header_size[0xa];
 
@@ -2394,7 +2394,7 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
 	MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
 	MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
-	MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
+	MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10,
 	MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
@@ -2427,7 +2427,7 @@ struct mlx5_ifc_flow_context_bits {
 	u8         reserved_at_a0[0x8];
 	u8         flow_counter_list_size[0x18];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         modify_header_id[0x20];
 
@@ -4802,19 +4802,19 @@ struct mlx5_ifc_query_eq_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
-struct mlx5_ifc_encap_header_in_bits {
+struct mlx5_ifc_packet_reformat_context_in_bits {
 	u8         reserved_at_0[0x5];
-	u8         header_type[0x3];
+	u8         reformat_type[0x3];
 	u8         reserved_at_8[0xe];
-	u8         encap_header_size[0xa];
+	u8         reformat_data_size[0xa];
 
 	u8         reserved_at_20[0x10];
-	u8         encap_header[2][0x8];
+	u8         reformat_data[2][0x8];
 
-	u8         more_encap_header[0][0x8];
+	u8         more_reformat_data[0][0x8];
 };
 
-struct mlx5_ifc_query_encap_header_out_bits {
+struct mlx5_ifc_query_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -4822,38 +4822,38 @@ struct mlx5_ifc_query_encap_header_out_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_encap_header_in_bits encap_header[0];
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0];
 };
 
-struct mlx5_ifc_query_encap_header_in_bits {
+struct mlx5_ifc_query_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_at_60[0xa0];
 };
 
-struct mlx5_ifc_alloc_encap_header_out_bits {
+struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_at_60[0x20];
 };
 
 enum {
-	MLX5_HEADER_TYPE_VXLAN = 0x0,
-	MLX5_HEADER_TYPE_NVGRE = 0x1,
+	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
+	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
 };
 
-struct mlx5_ifc_alloc_encap_header_in_bits {
+struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
@@ -4862,10 +4862,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_encap_header_in_bits encap_header;
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context;
 };
 
-struct mlx5_ifc_dealloc_encap_header_out_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -4874,14 +4874,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_dealloc_encap_header_in_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
 	u8         reserved_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_60[0x20];
 };
@@ -6983,7 +6983,7 @@ struct mlx5_ifc_create_flow_table_out_bits {
 };
 
 struct mlx5_ifc_flow_table_context_bits {
-	u8         encap_en[0x1];
+	u8         reformat_en[0x1];
 	u8         decap_en[0x1];
 	u8         reserved_at_2[0x2];
 	u8         table_miss_action[0x4];
-- 
cgit v1.2.3


From bea4e1f6c6c5744d467ebf8b0699f5e391835130 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:47 +0300
Subject: net/mlx5: Expose new packet reformat capabilities

Expose new abilities when creating a packet reformat context.

The new types which can be created are:
MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: Ability to create generic encap
operation to be done by the HW.

MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: Ability to create generic decap
operation where the inner packet doesn't contain L2.

MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: Ability to create generic encap
operation to be done by the HW. The L2 of the original packet
is dropped.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c79eaae28e59..3a4a2e0567e9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_c[0x1];
 	u8         pop_vlan_2[0x1];
 	u8         push_vlan_2[0x1];
-	u8         reserved_at_f[0x11];
-
+	u8	   reformat_and_vlan_action[0x1];
+	u8	   reserved_at_10[0x2];
+	u8	   reformat_l3_tunnel_to_l2[0x1];
+	u8	   reformat_l2_to_l3_tunnel[0x1];
+	u8	   reformat_and_modify_action[0x1];
+	u8         reserved_at_14[0xb];
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
 	u8         log_max_modify_header_context[0x8];
@@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	u8         nic_rx_multi_path_tirs[0x1];
 	u8         nic_rx_multi_path_tirs_fts[0x1];
 	u8         allow_sniffer_and_nic_rx_shared_tir[0x1];
-	u8         reserved_at_3[0x1fd];
+	u8	   reserved_at_3[0x1d];
+	u8	   encap_general_header[0x1];
+	u8	   reserved_at_21[0xa];
+	u8	   log_max_packet_reformat_context[0x5];
+	u8	   reserved_at_30[0x6];
+	u8	   max_encap_header_size[0xa];
+	u8	   reserved_at_40[0x1c0];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
@@ -4851,6 +4861,9 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 enum {
 	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
 	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
+	MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
+	MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
+	MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
 };
 
 struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
-- 
cgit v1.2.3


From 50acec06f3928fc29647aecf1270e54cae583afb Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:49 +0300
Subject: net/mlx5: Export packet reformat alloc/dealloc functions

This will allow for the RDMA side to allocate packet reformat context.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 37d0c08d0966..b1c026f1c8ba 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -203,4 +203,13 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
 				u32 modify_header_id);
 
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+			       int reformat_type,
+			       size_t size,
+			       void *reformat_data,
+			       enum mlx5_flow_namespace_type namespace,
+			       u32 *packet_reformat_id);
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  u32 packet_reformat_id);
+
 #endif
-- 
cgit v1.2.3


From 03512ceb60ae4be71ed3129dabb8625224c8ec40 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Fri, 31 Aug 2018 11:31:09 +0300
Subject: ieee80211: remove redundant leading zeroes

The defines of IEEE80211_HE_OPERATION_VHT_OPER_INFO and
IEEE80211_HE_OPERATION_MULTI_BSSID_AP have leading zeroes
that makes the number look like it is bigger than 32 bit.
This is misleading, remove it.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9c03a7d5e400..17ea51d088ae 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1963,8 +1963,8 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
 #define IEEE80211_HE_OPERATION_TWT_REQUIRED			0x00000200
 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK		0x000ffc00
 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET		10
-#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x000100000
-#define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x000200000
+#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x00100000
+#define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x00200000
 #define IEEE80211_HE_OPERATION_MULTI_BSSID_AP			0x10000000
 #define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR		0x20000000
 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED		0x40000000
-- 
cgit v1.2.3


From b0aa75f0b1b2e6bc77128fab36c8ed87e84917cc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 31 Aug 2018 11:31:16 +0300
Subject: ieee80211: add new VHT capability fields/parsing

IEEE 802.11-2016 extended the VHT capability fields to allow
indicating the number of spatial streams depending on the
actually used bandwidth, add support for decoding this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 17ea51d088ae..280600a10111 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1460,13 +1460,16 @@ struct ieee80211_ht_operation {
  *	STA can receive. Rate expressed in units of 1 Mbps.
  *	If this field is 0 this value should not be used to
  *	consider the highest RX data rate supported.
- *	The top 3 bits of this field are reserved.
+ *	The top 3 bits of this field indicate the Maximum NSTS,total
+ *	(a beamformee capability.)
  * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams
  * @tx_highest: Indicates highest long GI VHT PPDU data rate
  *	STA can transmit. Rate expressed in units of 1 Mbps.
  *	If this field is 0 this value should not be used to
  *	consider the highest TX data rate supported.
- *	The top 3 bits of this field are reserved.
+ *	The top 2 bits of this field are reserved, the
+ *	3rd bit from the top indiciates VHT Extended NSS BW
+ *	Capability.
  */
 struct ieee80211_vht_mcs_info {
 	__le16 rx_mcs_map;
@@ -1475,6 +1478,13 @@ struct ieee80211_vht_mcs_info {
 	__le16 tx_highest;
 } __packed;
 
+/* for rx_highest */
+#define IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT	13
+#define IEEE80211_VHT_MAX_NSTS_TOTAL_MASK	(7 << IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT)
+
+/* for tx_highest */
+#define IEEE80211_VHT_EXT_NSS_BW_CAPABLE	(1 << 13)
+
 /**
  * enum ieee80211_vht_mcs_support - VHT MCS support definitions
  * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the
@@ -1650,6 +1660,7 @@ struct ieee80211_mu_edca_param_set {
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ		0x00000004
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ	0x00000008
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK			0x0000000C
+#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_SHIFT			2
 #define IEEE80211_VHT_CAP_RXLDPC				0x00000010
 #define IEEE80211_VHT_CAP_SHORT_GI_80				0x00000020
 #define IEEE80211_VHT_CAP_SHORT_GI_160				0x00000040
@@ -1678,6 +1689,26 @@ struct ieee80211_mu_edca_param_set {
 #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB	0x0c000000
 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN			0x10000000
 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN			0x20000000
+#define IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT			30
+#define IEEE80211_VHT_CAP_EXT_NSS_BW_MASK			0xc0000000
+
+/**
+ * ieee80211_get_vht_max_nss - return max NSS for a given bandwidth/MCS
+ * @cap: VHT capabilities of the peer
+ * @bw: bandwidth to use
+ * @mcs: MCS index to use
+ * @ext_nss_bw_capable: indicates whether or not the local transmitter
+ *	(rate scaling algorithm) can deal with the new logic
+ *	(dot11VHTExtendedNSSBWCapable)
+ *
+ * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can
+ * vary for a given BW/MCS. This function parses the data.
+ *
+ * Note: This function is exported by cfg80211.
+ */
+int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
+			      enum ieee80211_vht_chanwidth bw,
+			      int mcs, bool ext_nss_bw_capable);
 
 /* 802.11ax HE MAC capabilities */
 #define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
-- 
cgit v1.2.3


From add7453ad62f05c8f1a48675bb4dfed52e6ac878 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Wed, 5 Sep 2018 08:06:08 +0300
Subject: wireless: align to draft 11ax D3.0

Align to new 11ax draft D3.0.  Change/add new MAC and PHY capabilities
and update drivers' 11ax capabilities and mac80211's debugfs
accordingly.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 72 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 46 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 280600a10111..c4809ad8ab46 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1555,11 +1555,11 @@ struct ieee80211_vht_operation {
  * struct ieee80211_he_cap_elem - HE capabilities element
  *
  * This structure is the "HE capabilities element" fixed fields as
- * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3
+ * described in P802.11ax_D3.0 section 9.4.2.237.2 and 9.4.2.237.3
  */
 struct ieee80211_he_cap_elem {
-	u8 mac_cap_info[5];
-	u8 phy_cap_info[9];
+	u8 mac_cap_info[6];
+	u8 phy_cap_info[11];
 } __packed;
 
 #define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN	5
@@ -1738,15 +1738,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US		0x04
 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US		0x08
 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK		0x0c
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1		0x00
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2		0x10
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3		0x20
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4		0x30
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5		0x40
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6		0x50
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7		0x60
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8		0x70
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK		0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1		0x00
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_2		0x10
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_3		0x20
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_4		0x30
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_5		0x40
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_6		0x50
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_7		0x60
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8		0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_MASK		0x70
 
 /* Link adaptation is split between byte HE_MAC_CAP1 and
  * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE
@@ -1760,14 +1760,13 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 
 #define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION			0x01
 #define IEEE80211_HE_MAC_CAP2_ALL_ACK				0x02
-#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED			0x04
+#define IEEE80211_HE_MAC_CAP2_TRS				0x04
 #define IEEE80211_HE_MAC_CAP2_BSR				0x08
 #define IEEE80211_HE_MAC_CAP2_BCAST_TWT				0x10
 #define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP			0x20
 #define IEEE80211_HE_MAC_CAP2_MU_CASCADING			0x40
 #define IEEE80211_HE_MAC_CAP2_ACK_EN				0x80
 
-#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU	0x01
 #define IEEE80211_HE_MAC_CAP3_OMI_CONTROL			0x02
 #define IEEE80211_HE_MAC_CAP3_OFDMA_RA				0x04
 
@@ -1775,25 +1774,34 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
  * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the
  * same field in the HE capabilities.
  */
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT	0x00
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1		0x08
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2		0x10
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED	0x18
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK		0x18
-#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG			0x20
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT	0x00
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1		0x08
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2		0x10
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED	0x18
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK		0x18
+#define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG			0x20
 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED			0x40
 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS		0x80
 
 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
 #define IEEE80211_HE_MAC_CAP4_QTP				0x02
 #define IEEE80211_HE_MAC_CAP4_BQR				0x04
-#define IEEE80211_HE_MAC_CAP4_SR_RESP				0x08
+#define IEEE80211_HE_MAC_CAP4_SRP_RESP				0x08
 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP			0x10
 #define IEEE80211_HE_MAC_CAP4_OPS				0x20
 #define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU			0x40
+/* Multi TID agg TX is split between byte #4 and #5
+ * The value is a combination of B39,B40,B41
+ */
+#define IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39		0x80
+
+#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40		0x01
+#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41		0x02
+#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION	0x04
+#define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU			0x08
+#define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX		0x10
 
 /* 802.11ax HE PHY capabilities */
-#define IEEE80211_HE_PHY_CAP0_DUAL_BAND					0x01
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G	0x04
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G		0x08
@@ -1810,10 +1818,10 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A				0x10
 #define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD			0x20
 #define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US		0x40
-/* Midamble RX Max NSTS is split between byte #2 and byte #3 */
-#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS			0x80
+/* Midamble RX/TX Max NSTS is split between byte #2 and byte #3 */
+#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS			0x80
 
-#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS			0x01
+#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS			0x01
 #define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US			0x02
 #define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ			0x04
 #define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ			0x08
@@ -1914,7 +1922,19 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU			0x04
 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU			0x08
 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI		0x10
-#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF			0x20
+#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF		0x20
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ				0x00
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ				0x40
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ				0x80
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ		0xc0
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK				0xc0
+
+#define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM		0x01
+#define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK		0x02
+#define IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU		0x04
+#define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU		0x08
+#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB	0x10
+#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB	0x20
 
 /* 802.11ax HE TX/RX MCS NSS Support  */
 #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS			(3)
-- 
cgit v1.2.3


From 83033688b7ade18d2dbbcefa810f02ff66ba549d Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 23 Jul 2018 10:55:39 +0300
Subject: net/mlx5: Change flow counters addlist type to single linked list

In order to prevent flow counters stats work function from traversing whole
flow counters tree while searching for deleted flow counters, new list to
store deleted flow counters will be added to struct mlx5_fc_stats. However,
the flow counter structure itself has no space left to store any more data
in first cache line. To free space that is needed to store additional list
node, convert current addlist double linked list (two pointers per node) to
atomic single linked list (one pointer per node).

Lockless NULL-terminated single linked list data type doesn't require any
additional external synchronization for operations used by flow counters
module (add single new element, remove all elements from list and traverse
them). Remove addlist_lock that is no longer needed.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..c00549293982 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -584,9 +584,7 @@ struct mlx5_irq_info {
 
 struct mlx5_fc_stats {
 	struct rb_root counters;
-	struct list_head addlist;
-	/* protect addlist add/splice operations */
-	spinlock_t addlist_lock;
+	struct llist_head addlist;
 
 	struct workqueue_struct *wq;
 	struct delayed_work work;
-- 
cgit v1.2.3


From 6e5e22839136fdb466af0aa46ff2404713dff974 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 23 Jul 2018 11:32:05 +0300
Subject: net/mlx5: Add new list to store deleted flow counters

In order to prevent flow counters stats work function from traversing whole
flow counters tree while searching for deleted flow counters, new list to
store deleted flow counters is added to struct mlx5_fc_stats. Lockless
NULL-terminated single linked list data type is used due to following
reasons:
 - This use case only needs to add single element to list and
 remove/iterate whole list. Lockless list doesn't require any additional
 synchronization for these operations.
 - First cache line of flow counter data structure only has space to store
 single additional pointer, which precludes usage of double linked list.

Remove flow counter 'deleted' flag that is no longer needed.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c00549293982..4b53ac64004b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -585,6 +585,7 @@ struct mlx5_irq_info {
 struct mlx5_fc_stats {
 	struct rb_root counters;
 	struct llist_head addlist;
+	struct llist_head dellist;
 
 	struct workqueue_struct *wq;
 	struct delayed_work work;
-- 
cgit v1.2.3


From 9aff93d7d0d4b3f3076d7bd12a4ad06ef1cf9804 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 24 Jul 2018 09:52:11 +0300
Subject: net/mlx5: Store flow counters in a list

In order to improve performance of flow counter stats query loop that
traverses all configured flow counters, replace rb_tree with double-linked
list. This change improves performance of traversing flow counters by
removing the tree traversal. (profiling data showed that call to rb_next
was most top CPU consumer)

However, lookup of flow flow counter in list becomes linear, instead of
logarithmic. This problem is fixed by next patch in series, which adds idr
for fast lookup. Idr is to be used because it is not an intrusive data
structure and doesn't require adding any new members to struct mlx5_fc,
which allows its control data part to stay <= 1 cache line in size.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4b53ac64004b..61bed33e6675 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -583,7 +583,7 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
-	struct rb_root counters;
+	struct list_head counters;
 	struct llist_head addlist;
 	struct llist_head dellist;
 
-- 
cgit v1.2.3


From 12d6066c3b29c5606c4a2466f964fbd9ede803c5 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 24 Jul 2018 16:37:40 +0300
Subject: net/mlx5: Add flow counters idr

Previous patch in series changed flow counter storage structure from
rb_tree to linked list in order to improve flow counter traversal
performance. The drawback of such solution is that flow counter lookup by
id becomes linear in complexity.

Store pointers to flow counters in idr in order to improve lookup
performance to logarithmic again. Idr is non-intrusive data structure and
doesn't require extending flow counter struct with new elements. This means
that idr can be used for lookup, while linked list from previous patch is
used for traversal, and struct mlx5_fc size is <= 2 cache lines.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 61bed33e6675..2a0c845f6bdb 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -583,6 +583,8 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
+	spinlock_t counters_idr_lock; /* protects counters_idr */
+	struct idr counters_idr;
 	struct list_head counters;
 	struct llist_head addlist;
 	struct llist_head dellist;
-- 
cgit v1.2.3


From 64109f1dc41f25f4a9c6b114e04b6266bf4128ad Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 5 Jun 2018 09:22:18 +0300
Subject: net/mlx5e: Replace PTP clock lock from RW lock to seq lock

Changed "priv.clock.lock" lock from 'rw_lock' to 'seq_lock'
in order to improve packet rate performance.

Tested on Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz.
Sent 64b packets between two peers connected by ConnectX-5,
and measured packet rate for the receiver in three modes:
	no time-stamping (base rate)
	time-stamping using rw_lock (old lock) for critical region
	time-stamping using seq_lock (new lock) for critical region
Only the receiver time stamped its packets.

The measured packet rate improvements are:

	Single flow (multiple TX rings to single RX ring):
		without timestamping:	  4.26 (M packets)/sec
		with rw-lock (old lock):  4.1  (M packets)/sec
		with seq-lock (new lock): 4.16 (M packets)/sec
		1.46% improvement

	Multiple flows (multiple TX rings to six RX rings):
		without timestamping: 	  22   (M packets)/sec
		with rw-lock (old lock):  11.7 (M packets)/sec
		with seq-lock (new lock): 21.3 (M packets)/sec
		82.05% improvement

The packet rate improvement is due to the lack of atomic operations
for the 'readers' by the seq-lock.
Since there are much more 'readers' than 'writers' contention
on this lock, almost all atomic operations are saved.
this results in a dramatic decrease in overall
cache misses.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2a0c845f6bdb..b7fce2c9443d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -805,7 +805,7 @@ struct mlx5_pps {
 };
 
 struct mlx5_clock {
-	rwlock_t                   lock;
+	seqlock_t                  lock;
 	struct cyclecounter        cycles;
 	struct timecounter         tc;
 	struct hwtstamp_config     hwtstamp_config;
-- 
cgit v1.2.3


From fa788d986a3aac5069378ed04697bd06f83d3488 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Mon, 3 Sep 2018 16:23:36 +0200
Subject: packet: add sockopt to ignore outgoing packets

Currently, the only way to ignore outgoing packets on a packet socket is
via the BPF filter.  With MSG_ZEROCOPY, packets that are looped into
AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even
if the filter run from packet_rcv() would reject them.  So the presence
of a packet socket on the interface takes away the benefits of
MSG_ZEROCOPY, even if the packet socket is not interested in outgoing
packets.  (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily
cloned, but the cost for that is much lower.)

Add a socket option to allow AF_PACKET sockets to ignore outgoing
packets to solve this.  Note that the *BSDs already have something
similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT.

The first intended user is lldpd.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4271f6b4e419..e2b3bd750c98 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2343,6 +2343,7 @@ static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
 
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
+	bool			ignore_outgoing;
 	struct net_device	*dev;	/* NULL is wildcarded here	     */
 	int			(*func) (struct sk_buff *,
 					 struct net_device *,
-- 
cgit v1.2.3


From a3f723079df85eafc10c628dabdfcf374b8e1523 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Wed, 5 Sep 2018 18:35:55 +0300
Subject: qed*: Utilize FW 8.37.7.0

This patch adds a new qed firmware with fixes and support for new features.

Fixes:
- Fix a rare case of device crash with iWARP, iSCSI or FCoE offload.
- Fix GRE tunneled traffic when iWARP offload is enabled.
- Fix RoCE failure in ib_send_bw when using inline data.
- Fix latency optimization flow for inline WQEs.
- BigBear 100G fix

RDMA:
- Reduce task context size.
- Application page sizes above 2GB support.
- Performance improvements.

ETH:
- Tenant DCB support.
- Replace RSS indirection table update interface.

Misc:
- Debug Tools changes.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h   | 10 +++++-----
 include/linux/qed/iscsi_common.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 0081fa6d1268..03f59a28fefd 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -110,7 +110,7 @@
 
 #define FW_MAJOR_VERSION	8
 #define FW_MINOR_VERSION        37
-#define FW_REVISION_VERSION     2
+#define FW_REVISION_VERSION     7
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -931,12 +931,12 @@ struct db_rdma_dpm_params {
 #define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT		16
 #define DB_RDMA_DPM_PARAMS_RESERVED0_MASK		0x1
 #define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT		27
-#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK		0x1
-#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT		28
+#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_MASK		0x1
+#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_SHIFT		28
 #define DB_RDMA_DPM_PARAMS_S_FLG_MASK			0x1
 #define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT			29
-#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK		0x1
-#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT		30
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK		0x1
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT		30
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK	0x1
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT	31
 };
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index b34c573f2b30..66aba505ec56 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -896,7 +896,7 @@ struct e4_ustorm_iscsi_task_ag_ctx {
 	__le32 exp_cont_len;
 	__le32 total_data_acked;
 	__le32 exp_data_acked;
-	u8 next_tid_valid;
+	u8 byte2;
 	u8 byte3;
 	__le16 word1;
 	__le16 next_tid;
-- 
cgit v1.2.3


From 8b69bd7d8a8927d537f134c37bcca6cbfa58e1b2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 11 Aug 2018 18:43:38 -0700
Subject: ppp: Remove direct skb_queue_head list pointer access.

Add a helper, __skb_peek(), and use it in ppp_mp_reconstruct().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..89283b77294d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1592,6 +1592,17 @@ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
 	return skb;
 }
 
+/**
+ *	__skb_peek - peek at the head of a non-empty &sk_buff_head
+ *	@list_: list to peek at
+ *
+ *	Like skb_peek(), but the caller knows that the list is not empty.
+ */
+static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_)
+{
+	return list_->next;
+}
+
 /**
  *	skb_peek_next - peek skb following the given one from a queue
  *	@skb: skb to start from
-- 
cgit v1.2.3


From a8305bff685252e80b7c60f4f5e7dd2e63e38218 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 29 Jul 2018 20:42:53 -0700
Subject: net: Add and use skb_mark_not_on_list().

An SKB is not on a list if skb->next is NULL.

Codify this convention into a helper function and use it
where we are dequeueing an SKB and need to mark it as such.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 89283b77294d..c4c9e3f5cd9a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1339,6 +1339,11 @@ static inline void skb_zcopy_abort(struct sk_buff *skb)
 	}
 }
 
+static inline void skb_mark_not_on_list(struct sk_buff *skb)
+{
+	skb->next = NULL;
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
-- 
cgit v1.2.3


From 992cba7e276d438ac8b0a8c17b147b37c8c286f7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 31 Jul 2018 15:27:56 -0700
Subject: net: Add and use skb_list_del_init().

It documents what is happening, and eliminates the spurious list
pointer poisoning.

In the long term, in order to get proper list head debugging, we
might want to use the list poison value as the indicator that
an SKB is a singleton and not on a list.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c4c9e3f5cd9a..e3a53ca4a9b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1344,6 +1344,12 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
 	skb->next = NULL;
 }
 
+static inline void skb_list_del_init(struct sk_buff *skb)
+{
+	__list_del_entry(&skb->list);
+	skb_mark_not_on_list(skb);
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
-- 
cgit v1.2.3


From 41124fa64d4b298b82266b7ddbefc43540b77b44 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:14 +0200
Subject: net: ethernet: Add helper to remove a supported link mode

Some MAC hardware cannot support a subset of link modes. e.g. often
1Gbps Full duplex is supported, but Half duplex is not. Add a helper
to remove such a link mode.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index cd6f637cbbfb..9c4c3eca8cf2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1049,6 +1049,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
+void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From af8d9bb2f2f405ad541794b46f9d7bc70f13e5cb Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:15 +0200
Subject: net: ethernet: Add helper for MACs which support asym pause

Rather than have the MAC drivers manipulate phydev members to indicate
they support Asym Pause, add a helper function.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9c4c3eca8cf2..e2db819807c1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
+void phy_support_asym_pause(struct phy_device *phydev);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From c306ad36184fb7d0bd53f45441f45c1810e88a53 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:16 +0200
Subject: net: ethernet: Add helper for MACs which support pause

Rather than have the MAC drivers manipulate phydev members, add a
helper function for MACs supporting Pause, but not Asym Pause.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index e2db819807c1..bc5d6c3f1388 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
+void phy_support_sym_pause(struct phy_device *phydev);
 void phy_support_asym_pause(struct phy_device *phydev);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
-- 
cgit v1.2.3


From 70814e819c1139e5e7faacb3700eab5eac559272 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:17 +0200
Subject: net: ethernet: Add helper for set_pauseparam for Asym Pause

ethtool can be used to enable/disable pause. Add a helper to configure
the PHY when asym pause is supported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index bc5d6c3f1388..e4062ba7472f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1052,6 +1052,7 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 void phy_support_sym_pause(struct phy_device *phydev);
 void phy_support_asym_pause(struct phy_device *phydev);
+void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From 0c122405d4c3ec638ba00865c872ec5a3ed1a6c0 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:18 +0200
Subject: net: ethernet: Add helper for set_pauseparam for Pause

ethtool can be used to enable/disable pause. Add a helper to configure
the PHY when Pause is supported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index e4062ba7472f..8521391ebb20 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1052,6 +1052,8 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 void phy_support_sym_pause(struct phy_device *phydev);
 void phy_support_asym_pause(struct phy_device *phydev);
+void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
+		       bool autoneg);
 void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
-- 
cgit v1.2.3


From 22b7d29926b577ff4f480611380d03268545b787 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:19 +0200
Subject: net: ethernet: Add helper to determine if pause configuration is
 supported

Rather than have MAC drivers open code the test, add a helper in
phylib. This will help when we change the type of phydev->supported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 8521391ebb20..192a1fa0c73b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1055,6 +1055,8 @@ void phy_support_asym_pause(struct phy_device *phydev);
 void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
 		       bool autoneg);
 void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
+bool phy_validate_pause(struct phy_device *phydev,
+			struct ethtool_pauseparam *pp);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From 15033f0457dca569b284bef0c8d3ad55fb37eacb Mon Sep 17 00:00:00 2001
From: Andre Naujoks <nautsch2@gmail.com>
Date: Mon, 10 Sep 2018 10:27:15 +0200
Subject: ipv6: Add sockopt IPV6_MULTICAST_ALL analogue to IP_MULTICAST_ALL

The socket option will be enabled by default to ensure current behaviour
is not changed. This is the same for the IPv4 version.

A socket bound to in6addr_any and a specific port will receive all traffic
on that port. Analogue to IP_MULTICAST_ALL, disable this behaviour, if
one or more multicast groups were joined (using said socket) and only
pass on multicast traffic from groups, which were explicitly joined via
this socket.

Without this option disabled a socket (system even) joined to multiple
multicast groups is very hard to get right. Filtering by destination
address has to take place in user space to avoid receiving multicast
traffic from other multicast groups, which might have traffic on the same
port.

The extension of the IP_MULTICAST_ALL socketoption to just apply to ipv6,
too, is not done to avoid changing the behaviour of current applications.

Signed-off-by: Andre Naujoks <nautsch2@gmail.com>
Acked-By: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8415bf1a9776..495e834c1367 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -274,7 +274,8 @@ struct ipv6_pinfo {
 						 */
 				dontfrag:1,
 				autoflowlabel:1,
-				autoflowlabel_set:1;
+				autoflowlabel_set:1,
+				mc_all:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
-- 
cgit v1.2.3


From fe8dd45bb7556246c6b76277b1ba4296c91c2505 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 12 Sep 2018 11:17:06 +0800
Subject: tun: switch to new type of msg_control

This patch introduces to a new tun/tap specific msg_control:

#define TUN_MSG_UBUF 1
#define TUN_MSG_PTR  2
struct tun_msg_ctl {
       int type;
       void *ptr;
};

This allows us to pass different kinds of msg_control through
sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will
be used by the existed vhost_net zerocopy code. The second is XDP
buff, which allows vhost_net to pass XDP buff to TUN. This could be
used to implement accepting an array of XDP buffs from vhost_net in
the following patches.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tun.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3d2996dc7d85..12e3eebf0ce6 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -16,9 +16,23 @@
 #define __IF_TUN_H
 
 #include <uapi/linux/if_tun.h>
+#include <uapi/linux/virtio_net.h>
 
 #define TUN_XDP_FLAG 0x1UL
 
+#define TUN_MSG_UBUF 1
+#define TUN_MSG_PTR  2
+struct tun_msg_ctl {
+	unsigned short type;
+	unsigned short num;
+	void *ptr;
+};
+
+struct tun_xdp_hdr {
+	int buflen;
+	struct virtio_net_hdr gso;
+};
+
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
-- 
cgit v1.2.3


From 52bb6677d530d37055092d86b4eab69dce6c166a Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 14 Sep 2018 16:00:51 +0800
Subject: net: move definition of pcpu_lstats to header file

pcpu_lstats is defined in several files, so unify them as one
and move to header file

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e2b3bd750c98..baed5d5088c5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2382,6 +2382,12 @@ struct pcpu_sw_netstats {
 	struct u64_stats_sync   syncp;
 };
 
+struct pcpu_lstats {
+	u64 packets;
+	u64 bytes;
+	struct u64_stats_sync syncp;
+};
+
 #define __netdev_alloc_pcpu_stats(type, gfp)				\
 ({									\
 	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
-- 
cgit v1.2.3


From d58e468b1112dcd1d5193c0a89ff9f98b5a3e8b9 Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Fri, 14 Sep 2018 07:46:18 -0700
Subject: flow_dissector: implements flow dissector BPF hook

Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and
attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector
path. The BPF program is per-network namespace.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h       | 1 +
 include/linux/bpf_types.h | 1 +
 include/linux/skbuff.h    | 7 +++++++
 3 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..988a00797bcd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -212,6 +212,7 @@ enum bpf_reg_type {
 	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
 };
 
 /* The information passed from prog-specific *_is_valid_access
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..22083712dd18 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..ce0e863f02a2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -243,6 +243,8 @@ struct scatterlist;
 struct pipe_inode_info;
 struct iov_iter;
 struct napi_struct;
+struct bpf_prog;
+union bpf_attr;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     const struct flow_dissector_key *key,
 			     unsigned int key_count);
 
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog);
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
+
 bool __skb_flow_dissect(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
-- 
cgit v1.2.3


From 2953d80ff04862b26a2e628fb3948868f54d753d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 31 Aug 2018 20:29:37 +0200
Subject: netfilter: remove obsolete need_conntrack stub

as of a0ae2562c6c4b27 ("netfilter: conntrack: remove l3proto
abstraction") there are no users anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 03097fa70975..e142b2b5f1ea 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -19,7 +19,4 @@ struct ip_conntrack_stat {
 	unsigned int search_restart;
 };
 
-/* call to create an explicit dependency on nf_conntrack. */
-void need_conntrack(void);
-
 #endif /* _NF_CONNTRACK_COMMON_H */
-- 
cgit v1.2.3


From 14d73416792afa84f6a7245ee474d2432069da56 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Mon, 17 Sep 2018 18:46:55 +0800
Subject: veth: rename pcpu_vstats as pcpu_lstats

struct pcpu_vstats and pcpu_lstats have same members and
usage, and pcpu_lstats is used in many files, so rename
pcpu_vstats as pcpu_lstats to reduce duplicate definition

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index baed5d5088c5..1cbbf77a685f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2000,7 +2000,6 @@ struct net_device {
 		struct pcpu_lstats __percpu		*lstats;
 		struct pcpu_sw_netstats __percpu	*tstats;
 		struct pcpu_dstats __percpu		*dstats;
-		struct pcpu_vstats __percpu		*vstats;
 	};
 
 #if IS_ENABLED(CONFIG_GARP)
-- 
cgit v1.2.3


From 2dfd184abd38fd72d80715fa8b00c9de78490200 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 18 Sep 2018 16:20:18 -0400
Subject: flow_dissector: fix build failure without CONFIG_NET

If boolean CONFIG_BPF_SYSCALL is enabled, kernel/bpf/syscall.c will
call flow_dissector functions from net/core/flow_dissector.c.

This causes this build failure if CONFIG_NET is disabled:

    kernel/bpf/syscall.o: In function `__x64_sys_bpf':
    syscall.c:(.text+0x3278): undefined reference to
    `skb_flow_dissector_bpf_prog_attach'
    syscall.c:(.text+0x3310): undefined reference to
    `skb_flow_dissector_bpf_prog_detach'
    kernel/bpf/syscall.o:(.rodata+0x3f0): undefined reference to
    `flow_dissector_prog_ops'
    kernel/bpf/verifier.o:(.rodata+0x250): undefined reference to
    `flow_dissector_verifier_ops'

Analogous to other optional BPF program types in syscall.c, add stubs
if the relevant functions are not compiled and move the BPF_PROG_TYPE
definition in the #ifdef CONFIG_NET block.

Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_types.h |  2 +-
 include/linux/skbuff.h    | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 22083712dd18..c9bd6fb765b0 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -16,6 +16,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
@@ -32,7 +33,6 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
-BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ce0e863f02a2..76be85ea392a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1194,10 +1194,23 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     const struct flow_dissector_key *key,
 			     unsigned int key_count);
 
+#ifdef CONFIG_NET
 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 				       struct bpf_prog *prog);
 
 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
+#else
+static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+						     struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 bool __skb_flow_dissect(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-- 
cgit v1.2.3


From 5d773ff41a7cdf0ef6cc6647435d59f0cf53e7b1 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Mon, 17 Sep 2018 13:30:46 +0300
Subject: net/mlx5: Rename incorrect naming in IFC file

Remove a trailing underscore from the multicast/unicast names.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3a4a2e0567e9..4c7a1d25d73b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2559,8 +2559,8 @@ enum {
 };
 
 enum {
-	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_    = 0x1,
-	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_  = 0x2,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST    = 0x1,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST  = 0x2,
 };
 
 struct mlx5_ifc_tirc_bits {
-- 
cgit v1.2.3


From 9799ccb0e984a5c1311b22a212e7ff96e8b736de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:49 -0700
Subject: tcp: add tcp_wstamp_ns socket field

TCP will soon provide earliest departure time on TX skbs.
It needs to track this in a new variable.

tcp_mstamp_refresh() needs to update this variable, and
became too big to stay an inline.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 263e37271afd..848f5b25e178 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -248,6 +248,8 @@ struct tcp_sock {
 		syn_smc:1;	/* SYN includes SMC */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
+	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
+
 /* RTT measurement */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
 	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */
-- 
cgit v1.2.3


From d3edd06ea8ea9e03de6567fda80b8be57e21a537 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:50 -0700
Subject: tcp: provide earliest departure time in skb->tstamp

Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns,
from usec units to nsec units.

Do not clear skb->tstamp before entering IP stacks in TX,
so that qdisc or devices can implement pacing based on the
earliest departure time instead of socket sk->sk_pacing_rate

Packets are fed with tcp_wstamp_ns, and following patch
will update tcp_wstamp_ns when both TCP and sch_fq switch to
the earliest departure time mechanism.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e3a53ca4a9b5..86f337e9a81d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -689,7 +689,7 @@ struct sk_buff {
 
 	union {
 		ktime_t		tstamp;
-		u64		skb_mstamp;
+		u64		skb_mstamp_ns; /* earliest departure time */
 	};
 	/*
 	 * This is the control buffer. It is free to use for every
-- 
cgit v1.2.3


From 9ba481e2eb3b932ae5b6278342b256e4f92d2793 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:20 +0300
Subject: net/mlx5: Set uid as part of CQ commands

Set uid as part of CQ commands so that the firmware can manage the CQ
object in a secured way.

The firmware should mark this CQ with the given uid so that it can
be used later on only by objects with the same uid.

Upon DEVX flows that use this CQ (e.g. create QP command), the
pointed CQ must have the same uid as of the issuer uid command.

When a command is issued with uid=0 it means that the issuer of the
command is trusted (i.e. kernel), in that case any pointed object
can be used regardless of its uid.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/cq.h       | 1 +
 include/linux/mlx5/mlx5_ifc.h | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 0ef6138eca49..31a750570c38 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -61,6 +61,7 @@ struct mlx5_core_cq {
 	int			reset_notify_added;
 	struct list_head	reset_notify;
 	struct mlx5_eq		*eq;
+	u16 uid;
 };
 
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4c7a1d25d73b..1a412b355054 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5629,7 +5629,7 @@ enum {
 
 struct mlx5_ifc_modify_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6404,7 +6404,7 @@ struct mlx5_ifc_destroy_cq_out_bits {
 
 struct mlx5_ifc_destroy_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7164,7 +7164,7 @@ struct mlx5_ifc_create_cq_out_bits {
 
 struct mlx5_ifc_create_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From 4ac63ec72587f7426aae15ddfe78e8ab785724dc Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:21 +0300
Subject: net/mlx5: Set uid as part of QP commands

Set uid as part of QP commands so that the firmware can manage the
QP object in a secured way.

That will enable using a QP that was created by verbs application to
be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++-----------
 include/linux/mlx5/qp.h       |  1 +
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1a412b355054..3b6a612787ac 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3394,7 +3394,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits {
 
 struct mlx5_ifc_sqerr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3424,7 +3424,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits {
 
 struct mlx5_ifc_sqd2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3629,7 +3629,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits {
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3659,7 +3659,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits {
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3689,7 +3689,7 @@ struct mlx5_ifc_rst2init_qp_out_bits {
 
 struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5192,7 +5192,7 @@ struct mlx5_ifc_qp_2rst_out_bits {
 
 struct mlx5_ifc_qp_2rst_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5214,7 +5214,7 @@ struct mlx5_ifc_qp_2err_out_bits {
 
 struct mlx5_ifc_qp_2err_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5789,7 +5789,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits {
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5819,7 +5819,7 @@ struct mlx5_ifc_init2init_qp_out_bits {
 
 struct mlx5_ifc_init2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6230,7 +6230,7 @@ struct mlx5_ifc_destroy_qp_out_bits {
 
 struct mlx5_ifc_destroy_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6895,7 +6895,7 @@ struct mlx5_ifc_create_qp_out_bits {
 
 struct mlx5_ifc_create_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4778d41085d4..fbe322c966bc 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -471,6 +471,7 @@ struct mlx5_core_qp {
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
+	u16			uid;
 };
 
 struct mlx5_core_dct {
-- 
cgit v1.2.3


From d269b3afffcb107375d9cf73127fc2e0181bc90b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:22 +0300
Subject: net/mlx5: Set uid as part of RQ commands

Set uid as part of RQ commands so that the firmware can manage the
RQ object in a secured way.

That will enable using an RQ that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3b6a612787ac..689631ca27b8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5488,7 +5488,7 @@ enum {
 
 struct mlx5_ifc_modify_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6164,7 +6164,7 @@ struct mlx5_ifc_destroy_rq_out_bits {
 
 struct mlx5_ifc_destroy_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6847,7 +6847,7 @@ struct mlx5_ifc_create_rq_out_bits {
 
 struct mlx5_ifc_create_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From 430ae0d5a3ce1350375690cb6ab29ab6fae4a9ac Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:23 +0300
Subject: net/mlx5: Set uid as part of SQ commands

Set uid as part of SQ commands so that the firmware can manage the
SQ object in a secured way.

That will enable using an SQ that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 689631ca27b8..72dd1e49a799 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5381,7 +5381,7 @@ struct mlx5_ifc_modify_sq_out_bits {
 
 struct mlx5_ifc_modify_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6096,7 +6096,7 @@ struct mlx5_ifc_destroy_sq_out_bits {
 
 struct mlx5_ifc_destroy_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6769,7 +6769,7 @@ struct mlx5_ifc_create_sq_out_bits {
 
 struct mlx5_ifc_create_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From a0d8c054318976927493ffd26055d9d183c9beec Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:24 +0300
Subject: net/mlx5: Set uid as part of SRQ commands

Set uid as part of SRQ commands so that the firmware can manage the
SRQ object in a secured way.

That will enable using an SRQ that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++-----------
 include/linux/mlx5/srq.h      |  1 +
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d885e9f0e054..8fb072aa8671 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -474,6 +474,7 @@ struct mlx5_core_srq {
 
 	atomic_t		refcount;
 	struct completion	free;
+	u16		uid;
 };
 
 struct mlx5_eq_table {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 72dd1e49a799..85f1237d80db 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5524,7 +5524,7 @@ struct mlx5_ifc_rmp_bitmask_bits {
 
 struct mlx5_ifc_modify_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5986,7 +5986,7 @@ struct mlx5_ifc_destroy_xrq_out_bits {
 
 struct mlx5_ifc_destroy_xrq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6008,7 +6008,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits {
 
 struct mlx5_ifc_destroy_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6074,7 +6074,7 @@ struct mlx5_ifc_destroy_srq_out_bits {
 
 struct mlx5_ifc_destroy_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6208,7 +6208,7 @@ struct mlx5_ifc_destroy_rmp_out_bits {
 
 struct mlx5_ifc_destroy_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6641,7 +6641,7 @@ struct mlx5_ifc_create_xrq_out_bits {
 
 struct mlx5_ifc_create_xrq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6665,7 +6665,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits {
 
 struct mlx5_ifc_create_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6741,7 +6741,7 @@ struct mlx5_ifc_create_srq_out_bits {
 
 struct mlx5_ifc_create_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6871,7 +6871,7 @@ struct mlx5_ifc_create_rmp_out_bits {
 
 struct mlx5_ifc_create_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7272,7 +7272,7 @@ enum {
 
 struct mlx5_ifc_arm_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7300,7 +7300,7 @@ enum {
 
 struct mlx5_ifc_arm_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index 24ff23e27c8a..1b1f3c20c6a3 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -61,6 +61,7 @@ struct mlx5_srq_attr {
 	u32 tm_next_tag;
 	u32 tm_hw_phase_cnt;
 	u32 tm_sw_phase_cnt;
+	u16 uid;
 };
 
 struct mlx5_core_dev;
-- 
cgit v1.2.3


From 774ea6eea29025218f75bc94c764df9a641db471 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:25 +0300
Subject: net/mlx5: Set uid as part of DCT commands

Set uid as part of DCT commands so that the firmware can manage the
DCT object in a secured way.

That will enable using a DCT that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 85f1237d80db..62c0592a9fdb 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5918,7 +5918,7 @@ struct mlx5_ifc_drain_dct_out_bits {
 
 struct mlx5_ifc_drain_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6382,7 +6382,7 @@ struct mlx5_ifc_destroy_dct_out_bits {
 
 struct mlx5_ifc_destroy_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7138,7 +7138,7 @@ struct mlx5_ifc_create_dct_out_bits {
 
 struct mlx5_ifc_create_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From bd37197554eb28a7fc38e44e005e303c77f788ed Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:26 +0300
Subject: net/mlx5: Update mlx5_ifc with DEVX UID bits

Add DEVX information to WQ, SRQ, CQ, TIR, TIS, QP,
RQ, XRCD, PD, MKEY and MCG.

Each object that is created/destroyed/modified via verbs will
be stamped with a UID based on its user context. This is already
done for DEVX objects commands.

This will enable the firmware to enforce the usage of kernel objects
from the DEVX flow by validating that the same UID is used and the
resources are really related to the same user.

The addition of *_valid fields are needed to distinguish
how various addresses are passed.

For non-DEVX callers, all those fields will be zero.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 67 +++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 62c0592a9fdb..68f4d5f9d929 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1290,7 +1290,9 @@ struct mlx5_ifc_wq_bits {
 	u8         reserved_at_118[0x3];
 	u8         log_wq_sz[0x5];
 
-	u8         reserved_at_120[0x3];
+	u8         dbr_umem_valid[0x1];
+	u8         wq_umem_valid[0x1];
+	u8         reserved_at_122[0x1];
 	u8         log_hairpin_num_packets[0x5];
 	u8         reserved_at_128[0x3];
 	u8         log_hairpin_data_sz[0x5];
@@ -2364,7 +2366,10 @@ struct mlx5_ifc_qpc_bits {
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_at_680[0xc0];
+	u8         reserved_at_680[0x3];
+	u8         dbr_umem_valid[0x1];
+
+	u8         reserved_at_684[0xbc];
 };
 
 struct mlx5_ifc_roce_addr_layout_bits {
@@ -2464,7 +2469,7 @@ struct mlx5_ifc_xrc_srqc_bits {
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_at_22[0x1];
+	u8         dbr_umem_valid[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
@@ -3128,7 +3133,9 @@ enum {
 
 struct mlx5_ifc_cqc_bits {
 	u8         status[0x4];
-	u8         reserved_at_4[0x4];
+	u8         reserved_at_4[0x2];
+	u8         dbr_umem_valid[0x1];
+	u8         reserved_at_7[0x1];
 	u8         cqe_sz[0x3];
 	u8         cc[0x1];
 	u8         reserved_at_c[0x1];
@@ -5314,7 +5321,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits {
 
 struct mlx5_ifc_modify_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5353,7 +5360,7 @@ struct mlx5_ifc_modify_tir_out_bits {
 
 struct mlx5_ifc_modify_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5454,7 +5461,7 @@ struct mlx5_ifc_rqt_bitmask_bits {
 
 struct mlx5_ifc_modify_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5641,7 +5648,10 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x40];
+
+	u8         cq_umem_valid[0x1];
+	u8         reserved_at_2c1[0x5bf];
 
 	u8         pas[0][0x40];
 };
@@ -5962,7 +5972,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits {
 
 struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6030,7 +6040,7 @@ struct mlx5_ifc_destroy_tis_out_bits {
 
 struct mlx5_ifc_destroy_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6052,7 +6062,7 @@ struct mlx5_ifc_destroy_tir_out_bits {
 
 struct mlx5_ifc_destroy_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6142,7 +6152,7 @@ struct mlx5_ifc_destroy_rqt_out_bits {
 
 struct mlx5_ifc_destroy_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6507,7 +6517,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits {
 
 struct mlx5_ifc_dealloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6595,7 +6605,7 @@ struct mlx5_ifc_dealloc_pd_out_bits {
 
 struct mlx5_ifc_dealloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6674,7 +6684,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x40];
+	u8         xrc_srq_umem_valid[0x1];
+	u8         reserved_at_2c1[0x5bf];
 
 	u8         pas[0][0x40];
 };
@@ -6693,7 +6705,7 @@ struct mlx5_ifc_create_tis_out_bits {
 
 struct mlx5_ifc_create_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6717,7 +6729,7 @@ struct mlx5_ifc_create_tir_out_bits {
 
 struct mlx5_ifc_create_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6823,7 +6835,7 @@ struct mlx5_ifc_create_rqt_out_bits {
 
 struct mlx5_ifc_create_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6908,7 +6920,10 @@ struct mlx5_ifc_create_qp_in_bits {
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_at_800[0x80];
+	u8         reserved_at_800[0x60];
+
+	u8         wq_umem_valid[0x1];
+	u8         reserved_at_861[0x1f];
 
 	u8         pas[0][0x40];
 };
@@ -6970,7 +6985,8 @@ struct mlx5_ifc_create_mkey_in_bits {
 	u8         reserved_at_40[0x20];
 
 	u8         pg_access[0x1];
-	u8         reserved_at_61[0x1f];
+	u8         mkey_umem_valid[0x1];
+	u8         reserved_at_62[0x1e];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
@@ -7173,7 +7189,10 @@ struct mlx5_ifc_create_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x60];
+
+	u8         cq_umem_valid[0x1];
+	u8         reserved_at_2e1[0x59f];
 
 	u8         pas[0][0x40];
 };
@@ -7221,7 +7240,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits {
 
 struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7348,7 +7367,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits {
 
 struct mlx5_ifc_alloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7436,7 +7455,7 @@ struct mlx5_ifc_alloc_pd_out_bits {
 
 struct mlx5_ifc_alloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From 6f99528e9797794b91b43321fbbc93fe772b0803 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:49 +0300
Subject: net: core: netlink: add helper refcount dec and lock function

Rtnl lock is encapsulated in netlink and cannot be accessed by other
modules directly. This means that reference counted objects that rely on
rtnl lock cannot use it with refcounter helper function that atomically
releases decrements reference and obtains mutex.

This patch implements simple wrapper function around refcount_dec_and_lock
that obtains rtnl lock if reference counter value reached 0.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5225832bd6ff..9cdd76348d9a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/wait.h>
+#include <linux/refcount.h>
 #include <uapi/linux/rtnetlink.h>
 
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
@@ -34,6 +35,7 @@ extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 extern int rtnl_lock_killable(void);
+extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct rw_semaphore pernet_ops_rwsem;
-- 
cgit v1.2.3


From 3a7d0d07a386716b459b00783b11a8211cefcc0f Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:51 +0300
Subject: net: sched: extend Qdisc with rcu

Currently, Qdisc API functions assume that users have rtnl lock taken. To
implement rtnl unlocked classifiers update interface, Qdisc API must be
extended with functions that do not require rtnl lock.

Extend Qdisc structure with rcu. Implement special version of put function
qdisc_put_unlocked() that is called without rtnl lock taken. This function
only takes rtnl lock if Qdisc reference counter reached zero and is
intended to be used as optimization.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 9cdd76348d9a..bb9cb84114c1 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 	return rtnl_dereference(dev->ingress_queue);
 }
 
+static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev)
+{
+	return rcu_dereference(dev->ingress_queue);
+}
+
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
 
 #ifdef CONFIG_NET_INGRESS
-- 
cgit v1.2.3


From 8bad74f9840f87661f20ced3dc80c84ab4fd55a1 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:45:36 +0000
Subject: bpf: extend cgroup bpf core to allow multiple cgroup storage types

In order to introduce per-cpu cgroup storage, let's generalize
bpf cgroup core to support multiple cgroup storage types.
Potentially, per-node cgroup storage can be added later.

This commit is mostly a formal change that replaces
cgroup_storage pointer with a array of cgroup_storage pointers.
It doesn't actually introduce a new storage type,
it will be done later.

Each bpf program is now able to have one cgroup storage of each type.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++++++++++----------
 include/linux/bpf.h        | 11 +++++++++--
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index f91b0f8ff3a9..e9871b012dac 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -2,6 +2,7 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
+#include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
@@ -22,7 +23,10 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+
+#define for_each_cgroup_storage_type(stype) \
+	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
 
 struct bpf_cgroup_storage_map;
 
@@ -43,7 +47,7 @@ struct bpf_cgroup_storage {
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
-	struct bpf_cgroup_storage *storage;
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
 struct bpf_prog_array;
@@ -101,18 +105,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+static inline enum bpf_cgroup_storage_type cgroup_storage_type(
+	struct bpf_map *map)
 {
+	return BPF_CGROUP_STORAGE_SHARED;
+}
+
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
+					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+{
+	enum bpf_cgroup_storage_type stype;
 	struct bpf_storage_buffer *buf;
 
-	if (!storage)
-		return;
+	for_each_cgroup_storage_type(stype) {
+		if (!storage[stype])
+			continue;
 
-	buf = READ_ONCE(storage->buf);
-	this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+		buf = READ_ONCE(storage[stype]->buf);
+		this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
+	}
 }
 
-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+					enum bpf_cgroup_storage_type stype);
 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
 			     struct cgroup *cgroup,
@@ -265,13 +280,14 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
+static inline void bpf_cgroup_storage_set(
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
 					    struct bpf_map *map) { return 0; }
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
 					      struct bpf_map *map) {}
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
-	struct bpf_prog *prog) { return 0; }
+	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
 static inline void bpf_cgroup_storage_free(
 	struct bpf_cgroup_storage *storage) {}
 
@@ -293,6 +309,8 @@ static inline void bpf_cgroup_storage_free(
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
+#define for_each_cgroup_storage_type(stype) for (; false; )
+
 #endif /* CONFIG_CGROUP_BPF */
 
 #endif /* _BPF_CGROUP_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 988a00797bcd..b457fbe7b70b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -272,6 +272,13 @@ struct bpf_prog_offload {
 	u32			jited_len;
 };
 
+enum bpf_cgroup_storage_type {
+	BPF_CGROUP_STORAGE_SHARED,
+	__BPF_CGROUP_STORAGE_MAX
+};
+
+#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
@@ -289,7 +296,7 @@ struct bpf_prog_aux {
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
-	struct bpf_map *cgroup_storage;
+	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
 	void *security;
@@ -358,7 +365,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
  */
 struct bpf_prog_array_item {
 	struct bpf_prog *prog;
-	struct bpf_cgroup_storage *cgroup_storage;
+	struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
 struct bpf_prog_array {
-- 
cgit v1.2.3


From f294b37ec7b24a574884cd157497a3748081c0f0 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:45:40 +0000
Subject: bpf: rework cgroup storage pointer passing

To simplify the following introduction of per-cpu cgroup storage,
let's rework a bit a mechanism of passing a pointer to a cgroup
storage into the bpf_get_local_storage(). Let's save a pointer
to the corresponding bpf_cgroup_storage structure, instead of
a pointer to the actual buffer.

It will help us to handle per-cpu storage later, which has
a different way of accessing to the actual data.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index e9871b012dac..7e0c9a1d48b7 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,7 +23,8 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage*,
+		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 #define for_each_cgroup_storage_type(stype) \
 	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -115,15 +116,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
 					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 {
 	enum bpf_cgroup_storage_type stype;
-	struct bpf_storage_buffer *buf;
-
-	for_each_cgroup_storage_type(stype) {
-		if (!storage[stype])
-			continue;
 
-		buf = READ_ONCE(storage[stype]->buf);
-		this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
-	}
+	for_each_cgroup_storage_type(stype)
+		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
 }
 
 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
-- 
cgit v1.2.3


From b741f1630346defcbc8cc60f1a2bdae8b3b0036f Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:45:43 +0000
Subject: bpf: introduce per-cpu cgroup local storage

This commit introduced per-cpu cgroup local storage.

Per-cpu cgroup local storage is very similar to simple cgroup storage
(let's call it shared), except all the data is per-cpu.

The main goal of per-cpu variant is to implement super fast
counters (e.g. packet counters), which don't require neither
lookups, neither atomic operations.

>From userspace's point of view, accessing a per-cpu cgroup storage
is similar to other per-cpu map types (e.g. per-cpu hashmaps and
arrays).

Writing to a per-cpu cgroup storage is not atomic, but is performed
by copying longs, so some minimal atomicity is here, exactly
as with other per-cpu maps.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 20 +++++++++++++++++++-
 include/linux/bpf.h        |  1 +
 include/linux/bpf_types.h  |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 7e0c9a1d48b7..588dd5f0bd85 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -37,7 +37,10 @@ struct bpf_storage_buffer {
 };
 
 struct bpf_cgroup_storage {
-	struct bpf_storage_buffer *buf;
+	union {
+		struct bpf_storage_buffer *buf;
+		void __percpu *percpu_buf;
+	};
 	struct bpf_cgroup_storage_map *map;
 	struct bpf_cgroup_storage_key key;
 	struct list_head list;
@@ -109,6 +112,9 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 static inline enum bpf_cgroup_storage_type cgroup_storage_type(
 	struct bpf_map *map)
 {
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+		return BPF_CGROUP_STORAGE_PERCPU;
+
 	return BPF_CGROUP_STORAGE_SHARED;
 }
 
@@ -131,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
 
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
+				     void *value, u64 flags);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -285,6 +295,14 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
 	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
 static inline void bpf_cgroup_storage_free(
 	struct bpf_cgroup_storage *storage) {}
+static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
+						 void *value) {
+	return 0;
+}
+static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
+					void *key, void *value, u64 flags) {
+	return 0;
+}
 
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b457fbe7b70b..018299a595c8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -274,6 +274,7 @@ struct bpf_prog_offload {
 
 enum bpf_cgroup_storage_type {
 	BPF_CGROUP_STORAGE_SHARED,
+	BPF_CGROUP_STORAGE_PERCPU,
 	__BPF_CGROUP_STORAGE_MAX
 };
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c9bd6fb765b0..5432f4c9f50e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
-- 
cgit v1.2.3


From 59c9d35ea9cd73c3a55642ec9a0097770baccb93 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Wed, 5 Sep 2018 17:06:37 +0300
Subject: net/mlx5: Cache the system image guid

The system image guid is a read-only field which is used by the TC
offloads code to determine if two mlx5 devices belong to the same
ASIC while adding flows.

Read this once and save it on the core device rather than querying each
time an offloaded flow is added.

Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 1 +
 include/linux/mlx5/vport.h  | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ed73b51f6697..26a92462f4ce 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -838,6 +838,7 @@ struct mlx5_core_dev {
 		u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
 		u32 qcam[MLX5_ST_SZ_DW(qcam_reg)];
 	} caps;
+	u64			sys_image_guid;
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
 	enum mlx5_device_state	state;
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 7e7c6dfcfb09..9c694808c212 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -121,4 +121,6 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status);
 int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
 				       struct mlx5_core_dev *port_mdev);
 int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
+
+u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From b31cdffa2329fe330cd304ca26c250dd1520fb0a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:09 +0200
Subject: net: phy: Move linkmode helpers to somewhere public

phylink has some useful helpers to working with linkmode bitmaps.
Move them to there own header so other code can use them.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/linkmode.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mii.h      |  1 +
 include/linux/phy.h      |  1 +
 3 files changed, 69 insertions(+)
 create mode 100644 include/linux/linkmode.h

(limited to 'include/linux')

diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
new file mode 100644
index 000000000000..014fb86c7114
--- /dev/null
+++ b/include/linux/linkmode.h
@@ -0,0 +1,67 @@
+#ifndef __LINKMODE_H
+#define __LINKMODE_H
+
+#include <linux/bitmap.h>
+#include <linux/ethtool.h>
+#include <uapi/linux/ethtool.h>
+
+static inline void linkmode_zero(unsigned long *dst)
+{
+	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
+{
+	bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_and(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_or(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline bool linkmode_empty(const unsigned long *src)
+{
+	return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1,
+				  const unsigned long *src2)
+{
+	return bitmap_andnot(dst, src1, src2,  __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_set_bit(int nr, volatile unsigned long *addr)
+{
+	__set_bit(nr, addr);
+}
+
+static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr)
+{
+	__clear_bit(nr, addr);
+}
+
+static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
+{
+	__change_bit(nr, addr);
+}
+
+static inline int linkmode_test_bit(int nr, volatile unsigned long *addr)
+{
+	return test_bit(nr, addr);
+}
+
+static inline int linkmode_equal(const unsigned long *src1,
+				 const unsigned long *src2)
+{
+	return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+#endif /* __LINKMODE_H */
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 55000ee5c6ad..567047ef0309 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -10,6 +10,7 @@
 
 
 #include <linux/if.h>
+#include <linux/linkmode.h>
 #include <uapi/linux/mii.h>
 
 struct ethtool_cmd;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 192a1fa0c73b..d24cc46748e2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -19,6 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
+#include <linux/linkmode.h>
 #include <linux/mdio.h>
 #include <linux/mii.h>
 #include <linux/module.h>
-- 
cgit v1.2.3


From ab2a605fa621ecf4ec26603a237822f7772cfa28 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:10 +0200
Subject: net: phy: Add phydev_warn()

Not all new style LINK_MODE bits can be converted into old style
SUPPORTED bits. We need to warn when such a conversion is attempted.
Add a helper for this.

Convert all pr_warn() calls to phydev_warn() where possible.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d24cc46748e2..0ab9f89773fd 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value)
 #define phydev_err(_phydev, format, args...)	\
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
+#define phydev_warn(_phydev, format, args...)	\
+	dev_warn(&_phydev->mdio.dev, format, ##args)
+
 #define phydev_dbg(_phydev, format, args...)	\
 	dev_dbg(&_phydev->mdio.dev, format, ##args)
 
-- 
cgit v1.2.3


From c4fabb8b3c0d724eb93dabaf346b0dd8a8be7118 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:11 +0200
Subject: net: phy: Add phydev_info()

Add phydev_info() and make use of it within the phy drivers and core
code.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 0ab9f89773fd..0f6e7bf5e9c5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value)
 #define phydev_err(_phydev, format, args...)	\
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
+#define phydev_info(_phydev, format, args...)	\
+	dev_info(&_phydev->mdio.dev, format, ##args)
+
 #define phydev_warn(_phydev, format, args...)	\
 	dev_warn(&_phydev->mdio.dev, format, ##args)
 
-- 
cgit v1.2.3


From edc7ccbbcf32b97c7d26cd556f364eb4f22c4285 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:12 +0200
Subject: net: phy: Add helper to convert MII ADV register to a linkmode

The phy_mii_ioctl can be used to write a value into the MII_ADVERTISE
register in the PHY. Since this changes the state of the PHY, we need
to make the same change to phydev->advertising. Add a helper which can
convert the register value to a linkmode.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 567047ef0309..8c7da9473ad9 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -303,6 +303,37 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa)
 	return result | mii_adv_to_ethtool_adv_x(lpa);
 }
 
+/**
+ * mii_adv_to_linkmode_adv_t
+ * @advertising:pointer to destination link mode.
+ * @adv: value of the MII_ADVERTISE register
+ *
+ * A small helper function that translates MII_ADVERTISE bits
+ * to linkmode advertisement settings.
+ */
+static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising,
+					     u32 adv)
+{
+	linkmode_zero(advertising);
+
+	if (adv & ADVERTISE_10HALF)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+				 advertising);
+	if (adv & ADVERTISE_10FULL)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+				 advertising);
+	if (adv & ADVERTISE_100HALF)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				 advertising);
+	if (adv & ADVERTISE_100FULL)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				 advertising);
+	if (adv & ADVERTISE_PAUSE_CAP)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising);
+	if (adv & ADVERTISE_PAUSE_ASYM)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising);
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From 5f991f7bddc991ecc3c8a009ffd76fccff4661c7 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:13 +0200
Subject: net: phy: Add helper for advertise to lcl value

Add a helper to convert the local advertising to an LCL capabilities,
which is then used to resolve pause flow control settings.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 8c7da9473ad9..9ed49c8261d0 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -334,6 +334,25 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising,
 		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising);
 }
 
+/**
+ * ethtool_adv_to_lcl_adv_t
+ * @advertising:pointer to ethtool advertising
+ *
+ * A small helper function that translates ethtool advertising to LVL
+ * pause capabilities.
+ */
+static inline u32 ethtool_adv_to_lcl_adv_t(u32 advertising)
+{
+	u32 lcl_adv = 0;
+
+	if (advertising & ADVERTISED_Pause)
+		lcl_adv |= ADVERTISE_PAUSE_CAP;
+	if (advertising & ADVERTISED_Asym_Pause)
+		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+	return lcl_adv;
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From f954a04ea18ebfcba1cd2756eaee59eb4978a20e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:14 +0200
Subject: net: phy: Add limkmode equivalents to some of the MII ethtool helpers

Add helpers which take a linkmode rather than a u32 ethtool for
advertising settings.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 9ed49c8261d0..2da85b02e1c0 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -132,6 +132,34 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv)
 	return result;
 }
 
+/**
+ * linkmode_adv_to_mii_adv_t
+ * @advertising: the linkmode advertisement settings
+ *
+ * A small helper function that translates linkmode advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_ADVERTISE register.
+ */
+static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising)
+{
+	u32 result = 0;
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, advertising))
+		result |= ADVERTISE_10HALF;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, advertising))
+		result |= ADVERTISE_10FULL;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, advertising))
+		result |= ADVERTISE_100HALF;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, advertising))
+		result |= ADVERTISE_100FULL;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising))
+		result |= ADVERTISE_PAUSE_CAP;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising))
+		result |= ADVERTISE_PAUSE_ASYM;
+
+	return result;
+}
+
 /**
  * mii_adv_to_ethtool_adv_t
  * @adv: value of the MII_ADVERTISE register
@@ -179,6 +207,28 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv)
 	return result;
 }
 
+/**
+ * linkmode_adv_to_mii_ctrl1000_t
+ * advertising: the linkmode advertisement settings
+ *
+ * A small helper function that translates linkmode advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_CTRL1000 register when in 1000T mode.
+ */
+static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising)
+{
+	u32 result = 0;
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+			      advertising))
+		result |= ADVERTISE_1000HALF;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+			      advertising))
+		result |= ADVERTISE_1000FULL;
+
+	return result;
+}
+
 /**
  * mii_ctrl1000_to_ethtool_adv_t
  * @adv: value of the MII_CTRL1000 register
-- 
cgit v1.2.3


From 719655a149715f26fc4de904fe0aa83068bd5b9e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:16 +0200
Subject: net: phy: Replace phy driver features u32 with link_mode bitmap

This is one step in allowing phylib to make use of link_mode bitmaps,
instead of u32 for supported and advertised features. Convert the phy
drivers to use bitmaps to indicates the features they support.

Build bitmap equivalents of the u32 values at runtime, and have the
drivers point to the appropriate bitmap. These bitmaps are shared, and
we don't want a driver to modify them. So mark them __ro_after_init.

Within phylib, the features bitmap is currently turned back into a
u32. This will be removed once the whole of phylib, and the drivers
are converted to use bitmaps.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/linkmode.h |  9 +++++++++
 include/linux/phy.h      | 24 ++++++++++++++++--------
 2 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index 014fb86c7114..22443d7fb5cd 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -43,6 +43,15 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr)
 	__set_bit(nr, addr);
 }
 
+static inline void linkmode_set_bit_array(const int *array, int array_size,
+					  unsigned long *addr)
+{
+	int i;
+
+	for (i = 0; i < array_size; i++)
+		linkmode_set_bit(array[i], addr);
+}
+
 static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr)
 {
 	__clear_bit(nr, addr);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 0f6e7bf5e9c5..dff51dd36e52 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -42,13 +42,21 @@
 #define PHY_1000BT_FEATURES	(SUPPORTED_1000baseT_Half | \
 				 SUPPORTED_1000baseT_Full)
 
-#define PHY_BASIC_FEATURES	(PHY_10BT_FEATURES | \
-				 PHY_100BT_FEATURES | \
-				 PHY_DEFAULT_FEATURES)
-
-#define PHY_GBIT_FEATURES	(PHY_BASIC_FEATURES | \
-				 PHY_1000BT_FEATURES)
-
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
+
+#define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features)
+#define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features)
+#define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features)
+#define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features)
+#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features)
+#define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features)
+#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features)
 
 /*
  * Set phydev->irq to PHY_POLL if interrupts are not supported,
@@ -510,7 +518,7 @@ struct phy_driver {
 	u32 phy_id;
 	char *name;
 	u32 phy_id_mask;
-	u32 features;
+	const unsigned long * const features;
 	u32 flags;
 	const void *driver_data;
 
-- 
cgit v1.2.3


From 9f2959b6b52d43326b2f6a0e0d7ffe6f4fc3b5ca Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 28 Sep 2018 08:51:09 +0200
Subject: net: phy: improve handling delayed work

Using mod_delayed_work() allows to simplify handling delayed work and
removes the need for the sync parameter in phy_trigger_machine().
Also introduce a helper phy_queue_state_machine() to encapsulate the
low-level delayed work calls. No functional change intended.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index dff51dd36e52..3ea87f774a76 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1054,7 +1054,7 @@ void phy_change_work(struct work_struct *work);
 void phy_mac_interrupt(struct phy_device *phydev);
 void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
-void phy_trigger_machine(struct phy_device *phydev, bool sync);
+void phy_trigger_machine(struct phy_device *phydev);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 void phy_ethtool_ksettings_get(struct phy_device *phydev,
 			       struct ethtool_link_ksettings *cmd);
-- 
cgit v1.2.3


From cc16567e5a8a7bb9439ef61ab80069acdd33f76f Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 2 Oct 2018 11:03:40 +0200
Subject: net: drop unused skb_append_datato_frags()

This helper is unused since commit 988cf74deb45 ("inet:
Stop generating UFO packets.")

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 87e29710373f..119d092c6b13 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1082,11 +1082,6 @@ static inline int skb_pad(struct sk_buff *skb, int pad)
 }
 #define dev_kfree_skb(a)	consume_skb(a)
 
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			    int getfrag(void *from, char *to, int offset,
-					int len, int odd, struct sk_buff *skb),
-			    void *from, int length);
-
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 			 int offset, size_t size);
 
-- 
cgit v1.2.3


From 5bf0961cc6a180c077793f2615a8fd842c655876 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Tue, 2 Oct 2018 06:16:11 -0700
Subject: qed: Add driver support for 20G link speed.

Add driver support for configuring/reading the 20G link speed.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 8cd34645e892..dee3c9c744f7 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -670,10 +670,11 @@ enum qed_link_mode_bits {
 	QED_LM_1000baseT_Half_BIT = BIT(4),
 	QED_LM_1000baseT_Full_BIT = BIT(5),
 	QED_LM_10000baseKR_Full_BIT = BIT(6),
-	QED_LM_25000baseKR_Full_BIT = BIT(7),
-	QED_LM_40000baseLR4_Full_BIT = BIT(8),
-	QED_LM_50000baseKR2_Full_BIT = BIT(9),
-	QED_LM_100000baseKR4_Full_BIT = BIT(10),
+	QED_LM_20000baseKR2_Full_BIT = BIT(7),
+	QED_LM_25000baseKR_Full_BIT = BIT(8),
+	QED_LM_40000baseLR4_Full_BIT = BIT(9),
+	QED_LM_50000baseKR2_Full_BIT = BIT(10),
+	QED_LM_100000baseKR4_Full_BIT = BIT(11),
 	QED_LM_COUNT = 11
 };
 
-- 
cgit v1.2.3


From f3709f69b7c5cba6323cc03c29b64293b93be817 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 2 Oct 2018 13:35:29 -0700
Subject: bpf: Add iterator for spilled registers

Add this iterator for spilled registers, it concentrates the details of
how to get the current frame's spilled registers into a single macro
while clarifying the intention of the code which is calling the macro.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b42b60a83e19..d0e7f97e8b60 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -131,6 +131,17 @@ struct bpf_verifier_state {
 	u32 curframe;
 };
 
+#define bpf_get_spilled_reg(slot, frame)				\
+	(((slot < frame->allocated_stack / BPF_REG_SIZE) &&		\
+	  (frame->stack[slot].slot_type[0] == STACK_SPILL))		\
+	 ? &frame->stack[slot].spilled_ptr : NULL)
+
+/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
+#define bpf_for_each_spilled_reg(iter, frame, reg)			\
+	for (iter = 0, reg = bpf_get_spilled_reg(iter, frame);		\
+	     iter < frame->allocated_stack / BPF_REG_SIZE;		\
+	     iter++, reg = bpf_get_spilled_reg(iter, frame))
+
 /* linked list of verifier states used to prune search */
 struct bpf_verifier_state_list {
 	struct bpf_verifier_state state;
-- 
cgit v1.2.3


From c64b7983288e636356f7f5f652de4813e1cfedac Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 2 Oct 2018 13:35:33 -0700
Subject: bpf: Add PTR_TO_SOCKET verifier type

Teach the verifier a little bit about a new type of pointer, a
PTR_TO_SOCKET. This pointer type is accessed from BPF through the
'struct bpf_sock' structure.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h          | 34 ++++++++++++++++++++++++++++++++++
 include/linux/bpf_verifier.h |  2 ++
 2 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 018299a595c8..027697b6a22f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,6 +154,7 @@ enum bpf_arg_type {
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
+	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 };
 
 /* type of values returned from helper functions */
@@ -162,6 +163,7 @@ enum bpf_return_type {
 	RET_VOID,			/* function doesn't return anything */
 	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -213,6 +215,8 @@ enum bpf_reg_type {
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
 	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
+	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
+	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -343,6 +347,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
+typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+					const struct bpf_insn *src,
+					struct bpf_insn *dst,
+					struct bpf_prog *prog,
+					u32 *target_size);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
@@ -836,4 +845,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+#if defined(CONFIG_NET)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+			      struct bpf_insn_access_aux *info);
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+				const struct bpf_insn *si,
+				struct bpf_insn *insn_buf,
+				struct bpf_prog *prog,
+				u32 *target_size);
+#else
+static inline bool bpf_sock_is_valid_access(int off, int size,
+					    enum bpf_access_type type,
+					    struct bpf_insn_access_aux *info)
+{
+	return false;
+}
+static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+					      const struct bpf_insn *si,
+					      struct bpf_insn *insn_buf,
+					      struct bpf_prog *prog,
+					      u32 *target_size)
+{
+	return 0;
+}
+#endif
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d0e7f97e8b60..a411363098a5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -58,6 +58,8 @@ struct bpf_reg_state {
 	 * offset, so they can share range knowledge.
 	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
 	 * came from, when one is tested for != NULL.
+	 * For PTR_TO_SOCKET this is used to share which pointers retain the
+	 * same reference to the socket, to determine proper reference freeing.
 	 */
 	u32 id;
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
-- 
cgit v1.2.3


From fd978bf7fd312581a7ca454a991f0ffb34c4204b Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 2 Oct 2018 13:35:35 -0700
Subject: bpf: Add reference tracking to verifier

Allow helper functions to acquire a reference and return it into a
register. Specific pointer types such as the PTR_TO_SOCKET will
implicitly represent such a reference. The verifier must ensure that
these references are released exactly once in each path through the
program.

To achieve this, this commit assigns an id to the pointer and tracks it
in the 'bpf_func_state', then when the function or program exits,
verifies that all of the acquired references have been freed. When the
pointer is passed to a function that frees the reference, it is removed
from the 'bpf_func_state` and all existing copies of the pointer in
registers are marked invalid.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index a411363098a5..7b6fd2ab3263 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -104,6 +104,17 @@ struct bpf_stack_state {
 	u8 slot_type[BPF_REG_SIZE];
 };
 
+struct bpf_reference_state {
+	/* Track each reference created with a unique id, even if the same
+	 * instruction creates the reference multiple times (eg, via CALL).
+	 */
+	int id;
+	/* Instruction where the allocation of this reference occurred. This
+	 * is used purely to inform the user of a reference leak.
+	 */
+	int insn_idx;
+};
+
 /* state of the program:
  * type of all registers and stack info
  */
@@ -121,7 +132,9 @@ struct bpf_func_state {
 	 */
 	u32 subprogno;
 
-	/* should be second to last. See copy_func_state() */
+	/* The following fields should be last. See copy_func_state() */
+	int acquired_refs;
+	struct bpf_reference_state *refs;
 	int allocated_stack;
 	struct bpf_stack_state *stack;
 };
@@ -217,11 +230,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 					   const char *fmt, ...);
 
-static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
 
-	return cur->frame[cur->curframe]->regs;
+	return cur->frame[cur->curframe];
+}
+
+static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+{
+	return cur_func(env)->regs;
 }
 
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
-- 
cgit v1.2.3


From f3edc2dbe0ad0bbbd8450cd37328f99acf215fd8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 1 Oct 2018 17:02:43 +0100
Subject: net: usbnet: make driver_info const

The driver_info field that is used for describing each of the usb-net
drivers using the usbnet.c core all declare their information as const
and the usbnet.c itself does not try and modify the struct.

It is therefore a good idea to make this const in the usbnet.c structure
in case anyone tries to modify it.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index e2ec3582e549..d8860f2d0976 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -28,7 +28,7 @@ struct usbnet {
 	/* housekeeping */
 	struct usb_device	*udev;
 	struct usb_interface	*intf;
-	struct driver_info	*driver_info;
+	const struct driver_info *driver_info;
 	const char		*driver_name;
 	void			*driver_priv;
 	wait_queue_head_t	wait;
-- 
cgit v1.2.3


From 16fc087b9cb22c9a97307cc24a5413d0df68fe11 Mon Sep 17 00:00:00 2001
From: Yashaswini Raghuram Prathivadi Bhayankaram
 <yashaswini.raghuram.prathivadi.bhayankaram@intel.com>
Date: Fri, 10 Aug 2018 00:17:44 -0700
Subject: virtchnl: Added support to exchange additional speed values

Introduced a new virtchnl capability flag and a struct to support exchange
of additional supported speeds.

Signed-off-by: Yashaswini Raghuram Prathivadi Bhayankaram <yashaswini.raghuram.prathivadi.bhayankaram@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index b41f7bc958ef..2c9756bd9c4c 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -252,6 +252,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 
+/* Define below the capability flags that are not offloads */
+#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		0x00000080
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
 			       VIRTCHNL_VF_OFFLOAD_RSS_PF)
@@ -596,10 +598,23 @@ enum virtchnl_event_codes {
 struct virtchnl_pf_event {
 	enum virtchnl_event_codes event;
 	union {
+		/* If the PF driver does not support the new speed reporting
+		 * capabilities then use link_event else use link_event_adv to
+		 * get the speed and link information. The ability to understand
+		 * new speeds is indicated by setting the capability flag
+		 * VIRTCHNL_VF_CAP_ADV_LINK_SPEED in vf_cap_flags parameter
+		 * in virtchnl_vf_resource struct and can be used to determine
+		 * which link event struct to use below.
+		 */
 		struct {
 			enum virtchnl_link_speed link_speed;
 			bool link_status;
 		} link_event;
+		struct {
+			/* link_speed provided in Mbps */
+			u32 link_speed;
+			u8 link_status;
+		} link_event_adv;
 	} event_data;
 
 	int severity;
-- 
cgit v1.2.3


From fcd29ad17c6ff885dfae58f557e9323941e63ba2 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Thu, 9 Aug 2018 09:55:21 +0300
Subject: net/mlx5: Add Fast teardown support

Today mlx5 devices support two teardown modes:
1- Regular teardown
2- Force teardown

This change introduces the enhanced version of the "Force teardown" that
allows SW to perform teardown in a faster way without the need to reclaim
all the pages.

Fast teardown provides the following advantages:
1- Fix a FW race condition that could cause command timeout
2- Avoid moving to polling mode
3- Close the vport to prevent PCI ACK to be sent without been scatter
to memory

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   | 4 ++++
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 11fa4e66afc5..e9b502d5bcc1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -504,6 +504,10 @@ struct health_buffer {
 	__be16		ext_synd;
 };
 
+enum mlx5_cmd_addr_l_sz_offset {
+	MLX5_NIC_IFC_OFFSET = 8,
+};
+
 struct mlx5_init_seg {
 	__be32			fw_rev;
 	__be32			cmdif_rev_fw_sub;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f043d65b9bac..6e8a882052b1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -896,7 +896,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_mkey[0x6];
 	u8         reserved_at_f0[0x8];
 	u8         dump_fill_mkey[0x1];
-	u8         reserved_at_f9[0x3];
+	u8         reserved_at_f9[0x2];
+	u8         fast_teardown[0x1];
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
@@ -3352,12 +3353,13 @@ struct mlx5_ifc_teardown_hca_out_bits {
 
 	u8         reserved_at_40[0x3f];
 
-	u8         force_state[0x1];
+	u8         state[0x1];
 };
 
 enum {
 	MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE  = 0x0,
 	MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE     = 0x1,
+	MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN = 0x2,
 };
 
 struct mlx5_ifc_teardown_hca_in_bits {
-- 
cgit v1.2.3


From bbb4c4323a4d9cb5ca04db904aa3050a7586839a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 4 Oct 2018 14:27:55 +0100
Subject: dns: Allow the dns resolver to retrieve a server set

Allow the DNS resolver to retrieve a set of servers and their associated
addresses, ports, preference and weight ratings.

In terms of communication with userspace, "srv=1" is added to the callout
string (the '1' indicating the maximum data version supported by the
kernel) to ask the userspace side for this.

If the userspace side doesn't recognise it, it will ignore the option and
return the usual text address list.

If the userspace side does recognise it, it will return some binary data
that begins with a zero byte that would cause the string parsers to give an
error.  The second byte contains the version of the data in the blob (this
may be between 1 and the version specified in the callout data).  The
remainder of the payload is version-specific.

In version 1, the payload looks like (note that this is packed):

	u8	Non-string marker (ie. 0)
	u8	Content (0 => Server list)
	u8	Version (ie. 1)
	u8	Source (eg. DNS_RECORD_FROM_DNS_SRV)
	u8	Status (eg. DNS_LOOKUP_GOOD)
	u8	Number of servers
	foreach-server {
		u16	Name length (LE)
		u16	Priority (as per SRV record) (LE)
		u16	Weight (as per SRV record) (LE)
		u16	Port (LE)
		u8	Source (eg. DNS_RECORD_FROM_NSS)
		u8	Status (eg. DNS_LOOKUP_GOT_NOT_FOUND)
		u8	Protocol (eg. DNS_SERVER_PROTOCOL_UDP)
		u8	Number of addresses
		char[]	Name (not NUL-terminated)
		foreach-address {
			u8		Family (AF_INET{,6})
			union {
				u8[4]	ipv4_addr
				u8[16]	ipv6_addr
			}
		}
	}

This can then be used to fetch a whole cell's VL-server configuration for
AFS, for example.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dns_resolver.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index 6ac3cad9aef1..34a744a1bafc 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -24,11 +24,9 @@
 #ifndef _LINUX_DNS_RESOLVER_H
 #define _LINUX_DNS_RESOLVER_H
 
-#ifdef __KERNEL__
+#include <uapi/linux/dns_resolver.h>
 
 extern int dns_query(const char *type, const char *name, size_t namelen,
 		     const char *options, char **_result, time64_t *_expiry);
 
-#endif /* KERNEL */
-
 #endif /* _LINUX_DNS_RESOLVER_H */
-- 
cgit v1.2.3


From 661b8d1b0e3a745e25f05adef2ebd00d830eeea7 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 1 Oct 2018 14:51:33 +0200
Subject: net: add umem reference in netdev{_rx}_queue

These references to the umem will be used to store information
on what kind of AF_XDP umem that is bound to a queue id, if any.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1cbbf77a685f..8318f79586c2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -609,6 +609,9 @@ struct netdev_queue {
 
 	/* Subordinate device that the queue has been assigned to */
 	struct net_device	*sb_dev;
+#ifdef CONFIG_XDP_SOCKETS
+	struct xdp_umem         *umem;
+#endif
 /*
  * write-mostly part
  */
@@ -738,6 +741,9 @@ struct netdev_rx_queue {
 	struct kobject			kobj;
 	struct net_device		*dev;
 	struct xdp_rxq_info		xdp_rxq;
+#ifdef CONFIG_XDP_SOCKETS
+	struct xdp_umem                 *umem;
+#endif
 } ____cacheline_aligned_in_smp;
 
 /*
-- 
cgit v1.2.3


From c2a90025ad09d830c8d8ae69f485eac6aaaa2472 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@bootlin.com>
Date: Thu, 4 Oct 2018 14:22:03 +0200
Subject: phy: add QSGMII and PCIE modes

Prepare for upcoming phys that'll handle QSGMII or PCIe.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Quentin Schulz <quentin.schulz@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 9713aebdd348..03b319f89a34 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -37,9 +37,11 @@ enum phy_mode {
 	PHY_MODE_USB_OTG,
 	PHY_MODE_SGMII,
 	PHY_MODE_2500SGMII,
+	PHY_MODE_QSGMII,
 	PHY_MODE_10GKR,
 	PHY_MODE_UFS_HS_A,
 	PHY_MODE_UFS_HS_B,
+	PHY_MODE_PCIE,
 };
 
 /**
-- 
cgit v1.2.3


From c941ce9c282cc606e6517356fcc186a9da2b4ab9 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Sun, 7 Oct 2018 12:56:47 +0100
Subject: bpf: add verifier callback to get stack usage info for offloaded
 progs

In preparation for BPF-to-BPF calls in offloaded programs, add a new
function attribute to the struct bpf_prog_offload_ops so that drivers
supporting eBPF offload can hook at the end of program verification, and
potentially extract information collected by the verifier.

Implement a minimal callback (returning 0) in the drivers providing the
structs, namely netdevsim and nfp.

This will be useful in the nfp driver, in later commits, to extract the
number of subprograms as well as the stack depth for those subprograms.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h          | 1 +
 include/linux/bpf_verifier.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 027697b6a22f..9b558713447f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -263,6 +263,7 @@ struct bpf_verifier_ops {
 struct bpf_prog_offload_ops {
 	int (*insn_hook)(struct bpf_verifier_env *env,
 			 int insn_idx, int prev_insn_idx);
+	int (*finalize)(struct bpf_verifier_env *env);
 };
 
 struct bpf_prog_offload {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7b6fd2ab3263..9e8056ec20fa 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -245,5 +245,6 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
 int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 				 int insn_idx, int prev_insn_idx);
+int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
 
 #endif /* _LINUX_BPF_VERIFIER_H */
-- 
cgit v1.2.3


From 4a19edb60d0203cd5bf95a8b46ea8f63fd41194c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:22 -0700
Subject: netlink: Pass extack to dump handlers

Declare extack in netlink_dump and pass to dump handlers via
netlink_callback. Add any extack message after the dump_done_errno
allowing error messages to be returned. This will be useful when
strict checking is done on dump requests, returning why the dump
fails EINVAL.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 71f121b66ca8..88c8a2d83eb3 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -176,6 +176,7 @@ struct netlink_callback {
 	void			*data;
 	/* the module that dump function belong to */
 	struct module		*module;
+	struct netlink_ext_ack	*extack;
 	u16			family;
 	u16			min_dump_alloc;
 	unsigned int		prev_seq, seq;
-- 
cgit v1.2.3


From 89d35528d17d25819a755a2b52931e911baebc66 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:27 -0700
Subject: netlink: Add new socket option to enable strict checking on dumps

Add a new socket option, NETLINK_DUMP_STRICT_CHK, that userspace
can use via setsockopt to request strict checking of headers and
attributes on dump requests.

To get dump features such as kernel side filtering based on data in
the header or attributes appended to the dump request, userspace
must call setsockopt() for NETLINK_DUMP_STRICT_CHK and a non-zero
value. Since the netlink sock and its flags are private to the
af_netlink code, the strict checking flag is passed to dump handlers
via a flag in the netlink_callback struct.

For old userspace on new kernel there is no impact as all of the data
checks in later patches are wrapped in a check on the new strict flag.

For new userspace on old kernel, the setsockopt will fail and even if
new userspace sets data in the headers and appended attributes the
kernel will silently ignore it. Moving forward when the setsockopt
succeeds, the new userspace on old kernel means the dump request can
pass an attribute the kernel does not understand. The dump will then
fail as the older kernel does not understand it.

New userspace on new kernel setting the socket option gets the benefit
of the improved data dump.

Kernel side the NETLINK_DUMP_STRICT_CHK uapi is converted to a generic
NETLINK_F_STRICT_CHK flag which can potentially be leveraged for tighter
checking on the NEW, DEL, and SET commands.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 88c8a2d83eb3..72580f1a72a2 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -179,6 +179,7 @@ struct netlink_callback {
 	struct netlink_ext_ack	*extack;
 	u16			family;
 	u16			min_dump_alloc;
+	bool			strict_check;
 	unsigned int		prev_seq, seq;
 	long			args[6];
 };
-- 
cgit v1.2.3


From f6a8a19bb11b46d60250ddc4e3e1ba6aa166f488 Mon Sep 17 00:00:00 2001
From: Denis Drozdov <denisd@mellanox.com>
Date: Tue, 14 Aug 2018 14:08:51 +0300
Subject: RDMA/netdev: Hoist alloc_netdev_mqs out of the driver

netdev has several interfaces that expect to call alloc_netdev_mqs from
the core code, with the driver only providing the arguments.  This is
incompatible with the rdma_netdev interface that returns the netdev
directly.

Thus re-organize the API used by ipoib so that the verbs core code calls
alloc_netdev_mqs for the driver. This is done by allowing the drivers to
provide the allocation parameters via a 'get_params' callback and then
initializing an allocated netdev as a second step.

Fixes: cd565b4b51e5 ("IB/IPoIB: Support acceleration options callbacks")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 26a92462f4ce..4b75796cac23 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1228,21 +1228,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
-#ifndef CONFIG_MLX5_CORE_IPOIB
-static inline
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-					  struct ib_device *ibdev,
-					  const char *name,
-					  void (*setup)(struct net_device *))
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-#else
+#ifdef CONFIG_MLX5_CORE_IPOIB
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
 					  void (*setup)(struct net_device *));
 #endif /* CONFIG_MLX5_CORE_IPOIB */
+int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
+			    struct ib_device *device,
+			    struct rdma_netdev_alloc_params *params);
 
 struct mlx5_profile {
 	u64	mask;
-- 
cgit v1.2.3


From f458e832ba510f843807fc2c2906a8fb59554c9f Mon Sep 17 00:00:00 2001
From: Chaitanya T K <chaitanya.mgit@gmail.com>
Date: Sat, 6 Oct 2018 19:34:59 +0200
Subject: mac80211: minstrel: Enable STBC and LDPC for VHT Rates

If peer support reception of STBC and LDPC, enable them for better
performance.

Signed-off-by: Chaitanya TK <chaitanya.mgit@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c4809ad8ab46..0ef67f837ae1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1670,6 +1670,7 @@ struct ieee80211_mu_edca_param_set {
 #define IEEE80211_VHT_CAP_RXSTBC_3				0x00000300
 #define IEEE80211_VHT_CAP_RXSTBC_4				0x00000400
 #define IEEE80211_VHT_CAP_RXSTBC_MASK				0x00000700
+#define IEEE80211_VHT_CAP_RXSTBC_SHIFT				8
 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE			0x00000800
 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE			0x00001000
 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT                  13
-- 
cgit v1.2.3


From cc53aabcc283c36274d3f3ce9adc4b40c21d4838 Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Thu, 11 Oct 2018 13:16:01 +0300
Subject: firmware: qcom: scm: Add WLAN VMID for Qualcomm SCM interface

Add WLAN related VMID's to support wlan driver to set up
the remote's permissions call via TrustZone.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Niklas Cassel <niklas.cassel@linaro.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/qcom_scm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 5d65521260b3..06996ad4f2bc 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2015, 2018, The Linux Foundation. All rights reserved.
  * Copyright (C) 2015 Linaro Ltd.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -33,6 +33,8 @@ struct qcom_scm_vmperm {
 
 #define QCOM_SCM_VMID_HLOS       0x3
 #define QCOM_SCM_VMID_MSS_MSA    0xF
+#define QCOM_SCM_VMID_WLAN       0x18
+#define QCOM_SCM_VMID_WLAN_CE    0x19
 #define QCOM_SCM_PERM_READ       0x4
 #define QCOM_SCM_PERM_WRITE      0x2
 #define QCOM_SCM_PERM_EXEC       0x1
-- 
cgit v1.2.3


From 604326b41a6fb9b4a78b6179335decee0365cd8c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:58 +0200
Subject: bpf, sockmap: convert to generic sk_msg interface

Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.

This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.

Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.

The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h       |  33 ++---
 include/linux/bpf_types.h |   2 +-
 include/linux/filter.h    |  21 ---
 include/linux/skmsg.h     | 371 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 383 insertions(+), 44 deletions(-)
 create mode 100644 include/linux/skmsg.h

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b558713447f..e60fff48288b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -737,33 +737,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 }
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
-struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
-struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
-int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
-int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-			struct bpf_prog *prog);
+#if defined(CONFIG_BPF_STREAM_PARSER)
+int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 #else
-static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	return NULL;
-}
-
-static inline struct sock  *__sock_hash_lookup_elem(struct bpf_map *map,
-						    void *key)
-{
-	return NULL;
-}
-
-static inline int sock_map_prog(struct bpf_map *map,
-				struct bpf_prog *prog,
-				u32 type)
+static inline int sock_map_prog_update(struct bpf_map *map,
+				       struct bpf_prog *prog, u32 which)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-				      struct bpf_prog *prog)
+static inline int sock_map_get_from_fd(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
@@ -839,6 +824,10 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
+extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
+extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
+extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
 
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..fa48343a5ea1 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -57,7 +57,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 #ifdef CONFIG_NET
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
+#if defined(CONFIG_BPF_STREAM_PARSER)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6791a0ac0139..5771874bc01e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -520,24 +520,6 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
-struct sk_msg_buff {
-	void *data;
-	void *data_end;
-	__u32 apply_bytes;
-	__u32 cork_bytes;
-	int sg_copybreak;
-	int sg_start;
-	int sg_curr;
-	int sg_end;
-	struct scatterlist sg_data[MAX_SKB_FRAGS];
-	bool sg_copy[MAX_SKB_FRAGS];
-	__u32 flags;
-	struct sock *sk_redir;
-	struct sock *sk;
-	struct sk_buff *skb;
-	struct list_head list;
-};
-
 struct bpf_redirect_info {
 	u32 ifindex;
 	u32 flags;
@@ -833,9 +815,6 @@ void xdp_do_flush_map(void);
 
 void bpf_warn_invalid_xdp_action(u32 act);
 
-struct sock *do_sk_redirect_map(struct sk_buff *skb);
-struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
-
 #ifdef CONFIG_INET
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 				  struct bpf_prog *prog, struct sk_buff *skb,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
new file mode 100644
index 000000000000..95678103c4a0
--- /dev/null
+++ b/include/linux/skmsg.h
@@ -0,0 +1,371 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#ifndef _LINUX_SKMSG_H
+#define _LINUX_SKMSG_H
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/strparser.h>
+
+#define MAX_MSG_FRAGS			MAX_SKB_FRAGS
+
+enum __sk_action {
+	__SK_DROP = 0,
+	__SK_PASS,
+	__SK_REDIRECT,
+	__SK_NONE,
+};
+
+struct sk_msg_sg {
+	u32				start;
+	u32				curr;
+	u32				end;
+	u32				size;
+	u32				copybreak;
+	bool				copy[MAX_MSG_FRAGS];
+	struct scatterlist		data[MAX_MSG_FRAGS];
+};
+
+struct sk_msg {
+	struct sk_msg_sg		sg;
+	void				*data;
+	void				*data_end;
+	u32				apply_bytes;
+	u32				cork_bytes;
+	u32				flags;
+	struct sk_buff			*skb;
+	struct sock			*sk_redir;
+	struct sock			*sk;
+	struct list_head		list;
+};
+
+struct sk_psock_progs {
+	struct bpf_prog			*msg_parser;
+	struct bpf_prog			*skb_parser;
+	struct bpf_prog			*skb_verdict;
+};
+
+enum sk_psock_state_bits {
+	SK_PSOCK_TX_ENABLED,
+};
+
+struct sk_psock_link {
+	struct list_head		list;
+	struct bpf_map			*map;
+	void				*link_raw;
+};
+
+struct sk_psock_parser {
+	struct strparser		strp;
+	bool				enabled;
+	void (*saved_data_ready)(struct sock *sk);
+};
+
+struct sk_psock_work_state {
+	struct sk_buff			*skb;
+	u32				len;
+	u32				off;
+};
+
+struct sk_psock {
+	struct sock			*sk;
+	struct sock			*sk_redir;
+	u32				apply_bytes;
+	u32				cork_bytes;
+	u32				eval;
+	struct sk_msg			*cork;
+	struct sk_psock_progs		progs;
+	struct sk_psock_parser		parser;
+	struct sk_buff_head		ingress_skb;
+	struct list_head		ingress_msg;
+	unsigned long			state;
+	struct list_head		link;
+	spinlock_t			link_lock;
+	refcount_t			refcnt;
+	void (*saved_unhash)(struct sock *sk);
+	void (*saved_close)(struct sock *sk, long timeout);
+	void (*saved_write_space)(struct sock *sk);
+	struct proto			*sk_proto;
+	struct sk_psock_work_state	work_state;
+	struct work_struct		work;
+	union {
+		struct rcu_head		rcu;
+		struct work_struct	gc;
+	};
+};
+
+int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+		 int elem_first_coalesce);
+void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
+int sk_msg_free(struct sock *sk, struct sk_msg *msg);
+int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
+void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes);
+void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes);
+
+void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
+
+int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      struct sk_msg *msg, u32 bytes);
+int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     struct sk_msg *msg, u32 bytes);
+
+static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
+{
+	WARN_ON(i == msg->sg.end && bytes);
+}
+
+static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
+{
+	if (psock->apply_bytes) {
+		if (psock->apply_bytes < bytes)
+			psock->apply_bytes = 0;
+		else
+			psock->apply_bytes -= bytes;
+	}
+}
+
+#define sk_msg_iter_var_prev(var)			\
+	do {						\
+		if (var == 0)				\
+			var = MAX_MSG_FRAGS - 1;	\
+		else					\
+			var--;				\
+	} while (0)
+
+#define sk_msg_iter_var_next(var)			\
+	do {						\
+		var++;					\
+		if (var == MAX_MSG_FRAGS)		\
+			var = 0;			\
+	} while (0)
+
+#define sk_msg_iter_prev(msg, which)			\
+	sk_msg_iter_var_prev(msg->sg.which)
+
+#define sk_msg_iter_next(msg, which)			\
+	sk_msg_iter_var_next(msg->sg.which)
+
+static inline void sk_msg_clear_meta(struct sk_msg *msg)
+{
+	memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
+}
+
+static inline void sk_msg_init(struct sk_msg *msg)
+{
+	memset(msg, 0, sizeof(*msg));
+	sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data));
+}
+
+static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
+			       int which, u32 size)
+{
+	dst->sg.data[which] = src->sg.data[which];
+	dst->sg.data[which].length  = size;
+	src->sg.data[which].length -= size;
+	src->sg.data[which].offset += size;
+}
+
+static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
+{
+	return msg->sg.end >= msg->sg.start ?
+		msg->sg.end - msg->sg.start :
+		msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
+}
+
+static inline bool sk_msg_full(const struct sk_msg *msg)
+{
+	return (msg->sg.end == msg->sg.start) && msg->sg.size;
+}
+
+static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
+{
+	return &msg->sg.data[which];
+}
+
+static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
+{
+	return sg_page(sk_msg_elem(msg, which));
+}
+
+static inline bool sk_msg_to_ingress(const struct sk_msg *msg)
+{
+	return msg->flags & BPF_F_INGRESS;
+}
+
+static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
+
+	if (msg->sg.copy[msg->sg.start]) {
+		msg->data = NULL;
+		msg->data_end = NULL;
+	} else {
+		msg->data = sg_virt(sge);
+		msg->data_end = msg->data + sge->length;
+	}
+}
+
+static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
+				   u32 len, u32 offset)
+{
+	struct scatterlist *sge;
+
+	get_page(page);
+	sge = sk_msg_elem(msg, msg->sg.end);
+	sg_set_page(sge, page, len, offset);
+	sg_unmark_end(sge);
+
+	msg->sg.copy[msg->sg.end] = true;
+	msg->sg.size += len;
+	sk_msg_iter_next(msg, end);
+}
+
+static inline struct sk_psock *sk_psock(const struct sock *sk)
+{
+	return rcu_dereference_sk_user_data(sk);
+}
+
+static inline bool sk_has_psock(struct sock *sk)
+{
+	return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
+}
+
+static inline void sk_psock_queue_msg(struct sk_psock *psock,
+				      struct sk_msg *msg)
+{
+	list_add_tail(&msg->list, &psock->ingress_msg);
+}
+
+static inline void sk_psock_report_error(struct sk_psock *psock, int err)
+{
+	struct sock *sk = psock->sk;
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+}
+
+struct sk_psock *sk_psock_init(struct sock *sk, int node);
+
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+
+int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+			 struct sk_msg *msg);
+
+static inline struct sk_psock_link *sk_psock_init_link(void)
+{
+	return kzalloc(sizeof(struct sk_psock_link),
+		       GFP_ATOMIC | __GFP_NOWARN);
+}
+
+static inline void sk_psock_free_link(struct sk_psock_link *link)
+{
+	kfree(link);
+}
+
+struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
+#if defined(CONFIG_BPF_STREAM_PARSER)
+void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
+#else
+static inline void sk_psock_unlink(struct sock *sk,
+				   struct sk_psock_link *link)
+{
+}
+#endif
+
+void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
+
+static inline void sk_psock_cork_free(struct sk_psock *psock)
+{
+	if (psock->cork) {
+		sk_msg_free(psock->sk, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
+	}
+}
+
+static inline void sk_psock_update_proto(struct sock *sk,
+					 struct sk_psock *psock,
+					 struct proto *ops)
+{
+	psock->saved_unhash = sk->sk_prot->unhash;
+	psock->saved_close = sk->sk_prot->close;
+	psock->saved_write_space = sk->sk_write_space;
+
+	psock->sk_proto = sk->sk_prot;
+	sk->sk_prot = ops;
+}
+
+static inline void sk_psock_restore_proto(struct sock *sk,
+					  struct sk_psock *psock)
+{
+	if (psock->sk_proto) {
+		sk->sk_prot = psock->sk_proto;
+		psock->sk_proto = NULL;
+	}
+}
+
+static inline void sk_psock_set_state(struct sk_psock *psock,
+				      enum sk_psock_state_bits bit)
+{
+	set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+					enum sk_psock_state_bits bit)
+{
+	clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+				       enum sk_psock_state_bits bit)
+{
+	return test_bit(bit, &psock->state);
+}
+
+static inline struct sk_psock *sk_psock_get(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock && !refcount_inc_not_zero(&psock->refcnt))
+		psock = NULL;
+	rcu_read_unlock();
+	return psock;
+}
+
+void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
+void sk_psock_destroy(struct rcu_head *rcu);
+void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
+
+static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
+{
+	if (refcount_dec_and_test(&psock->refcnt))
+		sk_psock_drop(sk, psock);
+}
+
+static inline void psock_set_prog(struct bpf_prog **pprog,
+				  struct bpf_prog *prog)
+{
+	prog = xchg(pprog, prog);
+	if (prog)
+		bpf_prog_put(prog);
+}
+
+static inline void psock_progs_drop(struct sk_psock_progs *progs)
+{
+	psock_set_prog(&progs->msg_parser, NULL);
+	psock_set_prog(&progs->skb_parser, NULL);
+	psock_set_prog(&progs->skb_verdict, NULL);
+}
+
+#endif /* _LINUX_SKMSG_H */
-- 
cgit v1.2.3


From d829e9c4112b52f4f00195900fd4c685f61365ab Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:59 +0200
Subject: tls: convert to generic sk_msg interface

Convert kTLS over to make use of sk_msg interface for plaintext and
encrypted scattergather data, so it reuses all the sk_msg helpers
and data structure which later on in a second step enables to glue
this to BPF.

This also allows to remove quite a bit of open coded helpers which
are covered by the sk_msg API. Recent changes in kTLs 80ece6a03aaf
("tls: Remove redundant vars from tls record structure") and
4e6d47206c32 ("tls: Add support for inplace records encryption")
changed the data path handling a bit; while we've kept the latter
optimization intact, we had to undo the former change to better
fit the sk_msg model, hence the sg_aead_in and sg_aead_out have
been brought back and are linked into the sk_msg sgs. Now the kTLS
record contains a msg_plaintext and msg_encrypted sk_msg each.

In the original code, the zerocopy_from_iter() has been used out
of TX but also RX path. For the strparser skb-based RX path,
we've left the zerocopy_from_iter() in decrypt_internal() mostly
untouched, meaning it has been moved into tls_setup_from_iter()
with charging logic removed (as not used from RX). Given RX path
is not based on sk_msg objects, we haven't pursued setting up a
dummy sk_msg to call into sk_msg_zerocopy_from_iter(), but it
could be an option to prusue in a later step.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skmsg.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 95678103c4a0..4e84b3c2eff8 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -102,6 +102,8 @@ struct sk_psock {
 
 int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
 		 int elem_first_coalesce);
+int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
+		 u32 off, u32 len);
 void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
 int sk_msg_free(struct sock *sk, struct sk_msg *msg);
 int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
-- 
cgit v1.2.3


From d3b18ad31f93d0b6bae105c679018a1ba7daa9ca Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 13 Oct 2018 02:46:01 +0200
Subject: tls: add bpf support to sk_msg handling

This work adds BPF sk_msg verdict program support to kTLS
allowing BPF and kTLS to be combined together. Previously kTLS
and sk_msg verdict programs were mutually exclusive in the
ULP layer which created challenges for the orchestrator when
trying to apply TCP based policy, for example. To resolve this,
leveraging the work from previous patches that consolidates
the use of sk_msg, we can finally enable BPF sk_msg verdict
programs so they continue to run after the kTLS socket is
created. No change in behavior when kTLS is not used in
combination with BPF, the kselftest suite for kTLS also runs
successfully.

Joint work with Daniel.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skmsg.h | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 4e84b3c2eff8..0b919f0bc6d6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -29,7 +29,11 @@ struct sk_msg_sg {
 	u32				size;
 	u32				copybreak;
 	bool				copy[MAX_MSG_FRAGS];
-	struct scatterlist		data[MAX_MSG_FRAGS];
+	/* The extra element is used for chaining the front and sections when
+	 * the list becomes partitioned (e.g. end < start). The crypto APIs
+	 * require the chaining.
+	 */
+	struct scatterlist		data[MAX_MSG_FRAGS + 1];
 };
 
 struct sk_msg {
@@ -112,6 +116,7 @@ void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
 				  u32 bytes);
 
 void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
+void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes);
 
 int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      struct sk_msg *msg, u32 bytes);
@@ -161,8 +166,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg)
 
 static inline void sk_msg_init(struct sk_msg *msg)
 {
+	BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS);
 	memset(msg, 0, sizeof(*msg));
-	sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data));
+	sg_init_marker(msg->sg.data, MAX_MSG_FRAGS);
 }
 
 static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
@@ -174,6 +180,12 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
 	src->sg.data[which].offset += size;
 }
 
+static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
+{
+	memcpy(dst, src, sizeof(*src));
+	sk_msg_init(src);
+}
+
 static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
 {
 	return msg->sg.end >= msg->sg.start ?
@@ -229,6 +241,26 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
 	sk_msg_iter_next(msg, end);
 }
 
+static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
+{
+	do {
+		msg->sg.copy[i] = copy_state;
+		sk_msg_iter_var_next(i);
+		if (i == msg->sg.end)
+			break;
+	} while (1);
+}
+
+static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start)
+{
+	sk_msg_sg_copy(msg, start, true);
+}
+
+static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start)
+{
+	sk_msg_sg_copy(msg, start, false);
+}
+
 static inline struct sk_psock *sk_psock(const struct sock *sk)
 {
 	return rcu_dereference_sk_user_data(sk);
@@ -245,6 +277,11 @@ static inline void sk_psock_queue_msg(struct sk_psock *psock,
 	list_add_tail(&msg->list, &psock->ingress_msg);
 }
 
+static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
+{
+	return psock ? list_empty(&psock->ingress_msg) : true;
+}
+
 static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 {
 	struct sock *sk = psock->sk;
-- 
cgit v1.2.3


From 9f9a742db40f95f4dc20fc7293de4ea6ddb24e47 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Tue, 9 Oct 2018 23:57:49 +0100
Subject: FDDI: defza: Support capturing outgoing SMT traffic

DEC FDDIcontroller 700 (DEFZA) uses a Tx/Rx queue pair to communicate
SMT frames with adapter's firmware.  Any SMT frame received from the RMC
via the Rx queue is queued back by the driver to the SMT Rx queue for
the firmware to process.  Similarly the firmware uses the SMT Tx queue
to supply the driver with SMT frames which are queued back to the Tx
queue for the RMC to send to the ring.

When a network tap is attached to an FDDI interface handled by `defza'
any incoming SMT frames captured are queued to our usual processing of
network data received, which in turn delivers them to any listening
taps.

However the outgoing SMT frames produced by the firmware bypass our
network protocol stack and are therefore not delivered to taps.  This in
turn means that taps are missing a part of network traffic sent by the
adapter, which may make it more difficult to track down network problems
or do general traffic analysis.

Call `dev_queue_xmit_nit' then in the SMT Tx path, having checked that
a network tap is attached, with a newly-created `dev_nit_active' helper
wrapping the usual condition used in the transmit path.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 22e4ef7bb701..dc1d9ed33b31 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3645,6 +3645,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
 	return 0;
 }
 
+bool dev_nit_active(struct net_device *dev);
 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
 
 extern int		netdev_budget;
-- 
cgit v1.2.3


From 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Oct 2018 09:37:52 -0700
Subject: tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh

In EDT design, I made the mistake of using tcp_wstamp_ns
to store the last tcp_clock_ns() sample and to store the
pacing virtual timer.

This causes major regressions at high speed flows.

Introduce tcp_clock_cache to store last tcp_clock_ns().
This is needed because some arches have slow high-resolution
kernel time service.

tcp_wstamp_ns is only updated when a packet is sent.

Note that we can remove tcp_mstamp in the future since
tcp_mstamp is essentially tcp_clock_cache/1000, so the
apparent socket size increase is temporary.

Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 848f5b25e178..8ed77bb4ed86 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -249,6 +249,7 @@ struct tcp_sock {
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
+	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
 
 /* RTT measurement */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
-- 
cgit v1.2.3


From 22e6c58b8c2843337ec4e8464b1ce6e869ca5bf4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:41 -0700
Subject: netlink: Add answer_flags to netlink_callback

With dump filtering we need a way to ensure the NLM_F_DUMP_FILTERED
flag is set on a message back to the user if the data returned is
influenced by some input attributes. Normally this can be done as
messages are added to the skb, but if the filter results in no data
being returned, the user could be confused as to why.

This patch adds answer_flags to the netlink_callback allowing dump
handlers to set the NLM_F_DUMP_FILTERED at a minimum in the
NLMSG_DONE message ensuring the flag gets back to the user.

The netlink_callback space is initialized to 0 via a memset in
__netlink_dump_start, so init of the new answer_flags is covered.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 72580f1a72a2..4da90a6ab536 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -180,6 +180,7 @@ struct netlink_callback {
 	u16			family;
 	u16			min_dump_alloc;
 	bool			strict_check;
+	u16			answer_flags;
 	unsigned int		prev_seq, seq;
 	long			args[6];
 };
-- 
cgit v1.2.3


From e1cedae1ba6b09ae8376c1486712bf91ea0dfc41 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:46 -0700
Subject: ipmr: Refactor mr_rtm_dumproute

Move per-table loops from mr_rtm_dumproute to mr_table_dump and export
mr_table_dump for dumps by specific table id.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 6675b9f81979..db85373c8d15 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -283,6 +283,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
 
 int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		   struct mr_mfc *c, struct rtmsg *rtm);
+int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
+		  struct netlink_callback *cb,
+		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags),
+		  spinlock_t *lock);
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
 					      struct mr_table *mrt),
-- 
cgit v1.2.3


From cb167893f41e21e6bd283d78e53489289dc0592d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:47 -0700
Subject: net: Plumb support for filtering ipv4 and ipv6 multicast route dumps

Implement kernel side filtering of routes by egress device index and
table id. If the table id is given in the filter, lookup table and
call mr_table_dump directly for it.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index db85373c8d15..34de06b426ef 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -7,6 +7,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/fib_notifier.h>
+#include <net/ip_fib.h>
 
 /**
  * struct vif_device - interface representor for multicast routing
@@ -288,7 +289,7 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
 		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
 			      u32 portid, u32 seq, struct mr_mfc *c,
 			      int cmd, int flags),
-		  spinlock_t *lock);
+		  spinlock_t *lock, struct fib_dump_filter *filter);
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
 					      struct mr_table *mrt),
@@ -296,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 				 struct sk_buff *skb,
 				 u32 portid, u32 seq, struct mr_mfc *c,
 				 int cmd, int flags),
-		     spinlock_t *lock);
+		     spinlock_t *lock, struct fib_dump_filter *filter);
 
 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
 	    int (*rules_dump)(struct net *net,
@@ -346,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 			     struct sk_buff *skb,
 			     u32 portid, u32 seq, struct mr_mfc *c,
 			     int cmd, int flags),
-		 spinlock_t *lock)
+		 spinlock_t *lock, struct fib_dump_filter *filter)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From a218dc82f0b5c6c8ad3d58c9870ed69e26c08b3e Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Wed, 10 Oct 2018 09:57:13 +0200
Subject: netfilter: nft_osf: Add ttl option support

Add ttl option support to the nftables "osf" expression.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_osf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index ecf7dab81e9e..c6000046c966 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 		  const struct list_head *nf_osf_fingers);
 
 const char *nf_osf_find(const struct sk_buff *skb,
-                        const struct list_head *nf_osf_fingers);
+			const struct list_head *nf_osf_fingers,
+			const int ttl_check);
 
 #endif /* _NFOSF_H */
-- 
cgit v1.2.3


From c56a8be7e7aa855ebcccf0e9d9eba2216514d399 Mon Sep 17 00:00:00 2001
From: Rahul Verma <Rahul.Verma@cavium.com>
Date: Tue, 16 Oct 2018 03:59:20 -0700
Subject: qed: Add supported link and advertise link to display in ethtool.

	Added transceiver type, speed capability and board types
	in HSI, are utilizing to display the accurate link
	information in ethtool.

Signed-off-by: Rahul Verma <Rahul.Verma@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index dee3c9c744f7..a47321a0d572 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -667,15 +667,35 @@ enum qed_link_mode_bits {
 	QED_LM_Autoneg_BIT = BIT(1),
 	QED_LM_Asym_Pause_BIT = BIT(2),
 	QED_LM_Pause_BIT = BIT(3),
-	QED_LM_1000baseT_Half_BIT = BIT(4),
-	QED_LM_1000baseT_Full_BIT = BIT(5),
+	QED_LM_1000baseT_Full_BIT = BIT(4),
+	QED_LM_10000baseT_Full_BIT = BIT(5),
 	QED_LM_10000baseKR_Full_BIT = BIT(6),
 	QED_LM_20000baseKR2_Full_BIT = BIT(7),
 	QED_LM_25000baseKR_Full_BIT = BIT(8),
 	QED_LM_40000baseLR4_Full_BIT = BIT(9),
 	QED_LM_50000baseKR2_Full_BIT = BIT(10),
 	QED_LM_100000baseKR4_Full_BIT = BIT(11),
-	QED_LM_COUNT = 11
+	QED_LM_2500baseX_Full_BIT = BIT(12),
+	QED_LM_Backplane_BIT = BIT(13),
+	QED_LM_1000baseKX_Full_BIT = BIT(14),
+	QED_LM_10000baseKX4_Full_BIT = BIT(15),
+	QED_LM_10000baseR_FEC_BIT = BIT(16),
+	QED_LM_40000baseKR4_Full_BIT = BIT(17),
+	QED_LM_40000baseCR4_Full_BIT = BIT(18),
+	QED_LM_40000baseSR4_Full_BIT = BIT(19),
+	QED_LM_25000baseCR_Full_BIT = BIT(20),
+	QED_LM_25000baseSR_Full_BIT = BIT(21),
+	QED_LM_50000baseCR2_Full_BIT = BIT(22),
+	QED_LM_100000baseSR4_Full_BIT = BIT(23),
+	QED_LM_100000baseCR4_Full_BIT = BIT(24),
+	QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25),
+	QED_LM_50000baseSR2_Full_BIT = BIT(26),
+	QED_LM_1000baseX_Full_BIT = BIT(27),
+	QED_LM_10000baseCR_Full_BIT = BIT(28),
+	QED_LM_10000baseSR_Full_BIT = BIT(29),
+	QED_LM_10000baseLR_Full_BIT = BIT(30),
+	QED_LM_10000baseLRM_Full_BIT = BIT(31),
+	QED_LM_COUNT = 32
 };
 
 struct qed_link_params {
-- 
cgit v1.2.3


From 94a04d1d3d3681adde1a3e022b25dbac7b345b7e Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Tue, 9 Oct 2018 12:05:12 +0300
Subject: net/mlx5: Expose DC scatter to CQE capability bit

dc_req_scat_data_cqe capability bit determines
if requester scatter to cqe is available for 64 bytes CQE over
DC transport type.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 68f4d5f9d929..0f460fb22c31 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1005,7 +1005,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         umr_modify_atomic_disabled[0x1];
 	u8         umr_indirect_mkey_disabled[0x1];
 	u8         umr_fence[0x2];
-	u8         reserved_at_20c[0x3];
+	u8         dc_req_scat_data_cqe[0x1];
+	u8         reserved_at_20d[0x2];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
-- 
cgit v1.2.3


From 3f4c3127d332000530349db4843deece27fe5e0c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Oct 2018 10:36:01 -0700
Subject: bpf: sockmap, fix skmsg recvmsg handler to track size correctly

When converting sockmap to new skmsg generic data structures we missed
that the recvmsg handler did not correctly use sg.size and instead was
using individual elements length. The result is if a sock is closed
with outstanding data we omit the call to sk_mem_uncharge() and can
get the warning below.

[   66.728282] WARNING: CPU: 6 PID: 5783 at net/core/stream.c:206 sk_stream_kill_queues+0x1fa/0x210

To fix this correct the redirect handler to xfer the size along with
the scatterlist and also decrement the size from the recvmsg handler.
Now when a sock is closed the remaining 'size' will be decremented
with sk_mem_uncharge().

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 0b919f0bc6d6..31df0d9fa536 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -176,6 +176,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
 {
 	dst->sg.data[which] = src->sg.data[which];
 	dst->sg.data[which].length  = size;
+	dst->sg.size		   += size;
 	src->sg.data[which].length -= size;
 	src->sg.data[which].offset += size;
 }
-- 
cgit v1.2.3


From 8734a162c13b1a893e7dff8de0df81fed04c51a6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Oct 2018 11:07:59 -0700
Subject: bpf: skmsg, improve sk_msg_used_element to work in cork context

Currently sk_msg_used_element is only called in zerocopy context where
cork is not possible and if this case happens we fallback to copy
mode. However the helper is more useful if it works in all contexts.

This patch resolved the case where if end == head indicating a full
or empty ring the helper always reports an empty ring. To fix this
add a test for the full ring case to avoid reporting a full ring
has 0 elements. This additional functionality will be used in the
next patches from recvmsg context where end = head with a full ring
is a valid case.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 31df0d9fa536..22347b08e1f8 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -187,18 +187,21 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
 	sk_msg_init(src);
 }
 
+static inline bool sk_msg_full(const struct sk_msg *msg)
+{
+	return (msg->sg.end == msg->sg.start) && msg->sg.size;
+}
+
 static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
 {
+	if (sk_msg_full(msg))
+		return MAX_MSG_FRAGS;
+
 	return msg->sg.end >= msg->sg.start ?
 		msg->sg.end - msg->sg.start :
 		msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
 }
 
-static inline bool sk_msg_full(const struct sk_msg *msg)
-{
-	return (msg->sg.end == msg->sg.start) && msg->sg.size;
-}
-
 static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
 {
 	return &msg->sg.data[which];
-- 
cgit v1.2.3


From b8aee82250b7d90a32b11ba208656f52dbaca342 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 2 Oct 2018 22:57:24 +0000
Subject: net/mlx5: E-Switch, Get counters for offloaded flows from callers

There's no real reason for the e-switch logic to manage the creation of
counters for offloaded flows. The API already has the directive for the
caller to denote they want to attach a counter to the created flow.
As such, we go and move the management of flow counters to the mlx5e
tc offload logic. This also lets us remove an inelegant interface where
the FS layer had to provide a way to retrieve a counter from a flow rule.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index b1c026f1c8ba..74d0ea146c9a 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -186,7 +186,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 				 struct mlx5_flow_destination *new_dest,
 				 struct mlx5_flow_destination *old_dest);
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler);
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
-- 
cgit v1.2.3


From 171c7625bef999848ee6032c6dde96e7330c4d15 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Wed, 3 Oct 2018 00:03:35 +0000
Subject: net/mlx5: Use flow counter IDs and not the wrapping cache object

Currently, when a flow rule is created using the FS core layer, the caller
has to pass the entire flow counter object and not just the counter HW
handle (ID). This requires both the FS core and the caller to have
knowledge about the inner implementation of the FS layer flow counters
cache and limits the possible users.

Move to use the counter ID across the place when dealing with flows.

Doing this decoupling, now can we privatize the inner implementation
of the flow counters.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 74d0ea146c9a..a5fc62184195 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -92,7 +92,7 @@ struct mlx5_flow_destination {
 		u32			tir_num;
 		u32			ft_num;
 		struct mlx5_flow_table	*ft;
-		struct mlx5_fc		*counter;
+		u32			counter_id;
 		struct {
 			u16		num;
 			u16		vhca_id;
@@ -192,6 +192,7 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 		  u64 *packets, u64 *bytes);
+u32 mlx5_fc_id(struct mlx5_fc *counter);
 
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
-- 
cgit v1.2.3


From b9aa0ba17af5afa13605eb6ea91f1974da97a2e2 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 31 May 2018 11:50:23 +0300
Subject: net/mlx5: Add cap bits for multi fdb encap

If set, the firmware supports creating of flow tables with encap
enabled while VFs are configured, if we already created one
(restriction still applies on the first creation).

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 15e36198f85f..963611820006 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -584,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
 	u8      reserved_at_0[0x1c];
 	u8      fdb_multi_path_to_table[0x1];
-	u8      reserved_at_1d[0x1e3];
+	u8      reserved_at_1d[0x1];
+	u8      multi_fdb_encap[0x1];
+	u8      reserved_at_1e[0x1e1];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
-- 
cgit v1.2.3


From 328edb499f99126946845ece477c9c1afe8631af Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Tue, 3 Jul 2018 11:13:00 +0300
Subject: net/mlx5: Split FDB fast path prio to multiple namespaces

Towards supporting multi-chains and priorities, split the FDB fast path
to multiple namespaces (sub namespaces), each with multiple priorities.

This patch adds a new flow steering type, FS_TYPE_PRIO_CHAINS, which is
like current FS_TYPE_PRIO, but may contain only namespaces, and those
will be in parallel to one another in terms of managing of the flow
tables connections inside them. Meaning, while searching for the next
or previous flow table to connect for a new table inside such namespace
we skip the parallel namespaces in the same level under the
FS_TYPE_PRIO_CHAINS prio we originated from.

We use this new type for splitting the fast path prio into multiple
parallel namespaces, each containing normal prios.
The prios inside them (and their tables) will be connected to one
another, but not from one parallel namespace to another, instead the
last prio in each namespace will be connected to the next prio in
the containing FDB namespace, which is the slow path prio.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index a5fc62184195..f8d00872c7d3 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -101,6 +101,8 @@ struct mlx5_flow_destination {
 	};
 };
 
+struct mlx5_flow_namespace *
+mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n);
 struct mlx5_flow_namespace *
 mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			enum mlx5_flow_namespace_type type);
-- 
cgit v1.2.3


From d5634fee245f9e92787e3a34ef621fc12b2cbf16 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 20 Sep 2018 12:17:48 +0200
Subject: net/mlx5: Add a no-append flow insertion mode

If no-append flag is set, we will add a new FTE, instead of appending
the actions of the inserted rule when the same match already exists.

While here, move the has_flow_tag boolean indicator to be a flag too.

This patch doesn't change any functionality.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanmox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/fs.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index f8d00872c7d3..5660f07d3be0 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -158,20 +158,28 @@ struct mlx5_fs_vlan {
 
 #define MLX5_FS_VLAN_DEPTH	2
 
+enum {
+	FLOW_ACT_HAS_TAG   = BIT(0),
+	FLOW_ACT_NO_APPEND = BIT(1),
+};
+
 struct mlx5_flow_act {
 	u32 action;
-	bool has_flow_tag;
 	u32 flow_tag;
 	u32 reformat_id;
 	u32 modify_id;
 	uintptr_t esp_id;
+	u32 flags;
 	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
 	struct ib_counters *counters;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
-	struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
-				     MLX5_FS_DEFAULT_FLOW_TAG, 0, 0}
+	struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
+				      .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
+				      .reformat_id = 0, \
+				      .modify_id = 0, \
+				      .flags =  0, }
 
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
-- 
cgit v1.2.3


From 82385b0d2d2504aee51aa3fb40ebfb03603f64c3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 17 Oct 2018 15:01:37 +0200
Subject: net: skbuff.h: Mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 119d092c6b13..0ba687454267 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
 #define __it(x, op) (x -= sizeof(u##op))
 #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
 	case 32: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 24: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 16: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case  8: diffs |= __it_diff(a, b, 64);
 		break;
 	case 28: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 20: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 12: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case  4: diffs |= __it_diff(a, b, 32);
 		break;
 	}
-- 
cgit v1.2.3


From 4972e6fa3a04032830bc3d6bb343d08ab3546773 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 12 Sep 2018 15:36:41 +0300
Subject: net/mlx5: Refactor fragmented buffer struct fields and init flow

Take struct mlx5_frag_buf out of mlx5_frag_buf_ctrl, as it is not
needed to manage and control the datapath of the fragmented buffers API.

struct mlx5_frag_buf contains control info to manage the allocation
and de-allocation of the fragmented buffer.
Its fields are not relevant for datapath, so here I take them out of the
struct mlx5_frag_buf_ctrl, except for the fragments array itself.

In addition, modified mlx5_fill_fbc to initialise the frags pointers
as well. This implies that the buffer must be allocated before the
function is called.

A set of type-specific *_get_byte_size() functions are replaced by
a generic one.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 94ffd02af7cd..e10f61a1f77d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -357,7 +357,7 @@ struct mlx5_frag_buf {
 };
 
 struct mlx5_frag_buf_ctrl {
-	struct mlx5_frag_buf	frag_buf;
+	struct mlx5_buf_list   *frags;
 	u32			sz_m1;
 	u16			frag_sz_m1;
 	u16			strides_offset;
@@ -994,10 +994,12 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
+					u8 log_stride, u8 log_sz,
 					u16 strides_offset,
 					struct mlx5_frag_buf_ctrl *fbc)
 {
+	fbc->frags      = frags;
 	fbc->log_stride = log_stride;
 	fbc->log_sz     = log_sz;
 	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
@@ -1006,18 +1008,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
 	fbc->strides_offset = strides_offset;
 }
 
-static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc(struct mlx5_buf_list *frags,
+				 u8 log_stride, u8 log_sz,
 				 struct mlx5_frag_buf_ctrl *fbc)
 {
-	mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
-}
-
-static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
-					      void *cqc)
-{
-	mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz),
-		      MLX5_GET(cqc, cqc, log_cq_size),
-		      fbc);
+	mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc);
 }
 
 static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
@@ -1028,8 +1023,7 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 	ix  += fbc->strides_offset;
 	frag = ix >> fbc->log_frag_strides;
 
-	return fbc->frag_buf.frags[frag].buf +
-		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+	return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
 }
 
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 4b5b9c7d972e8a7b1e7691c7c921ec0d6dec33b9 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 9 Oct 2018 14:16:43 +0300
Subject: net/mlx5: Add FEC fields to Port Phy Link Mode (PPLM) reg

Added FEC related fields to PPLM layout.
These fields are needed to set and query FEC policy
for different link speeds.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 39 ++++++++++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e10f61a1f77d..696ed3f7f894 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -133,6 +133,7 @@ enum {
 	MLX5_REG_PVLC		 = 0x500f,
 	MLX5_REG_PCMR		 = 0x5041,
 	MLX5_REG_PMLP		 = 0x5002,
+	MLX5_REG_PPLM		 = 0x5023,
 	MLX5_REG_PCAM		 = 0x507f,
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 963611820006..47b09a742ae5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7828,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits {
 
 struct mlx5_ifc_pplm_reg_bits {
 	u8         reserved_at_0[0x8];
-	u8         local_port[0x8];
-	u8         reserved_at_10[0x10];
+	u8	   local_port[0x8];
+	u8	   reserved_at_10[0x10];
 
-	u8         reserved_at_20[0x20];
+	u8	   reserved_at_20[0x20];
 
-	u8         port_profile_mode[0x8];
-	u8         static_port_profile[0x8];
-	u8         active_port_profile[0x8];
-	u8         reserved_at_58[0x8];
+	u8	   port_profile_mode[0x8];
+	u8	   static_port_profile[0x8];
+	u8	   active_port_profile[0x8];
+	u8	   reserved_at_58[0x8];
 
-	u8         retransmission_active[0x8];
-	u8         fec_mode_active[0x18];
+	u8	   retransmission_active[0x8];
+	u8	   fec_mode_active[0x18];
 
-	u8         reserved_at_80[0x20];
+	u8	   rs_fec_correction_bypass_cap[0x4];
+	u8	   reserved_at_84[0x8];
+	u8	   fec_override_cap_56g[0x4];
+	u8	   fec_override_cap_100g[0x4];
+	u8	   fec_override_cap_50g[0x4];
+	u8	   fec_override_cap_25g[0x4];
+	u8	   fec_override_cap_10g_40g[0x4];
+
+	u8	   rs_fec_correction_bypass_admin[0x4];
+	u8	   reserved_at_a4[0x8];
+	u8	   fec_override_admin_56g[0x4];
+	u8	   fec_override_admin_100g[0x4];
+	u8	   fec_override_admin_50g[0x4];
+	u8	   fec_override_admin_25g[0x4];
+	u8	   fec_override_admin_10g_40g[0x4];
 };
 
 struct mlx5_ifc_ppcnt_reg_bits {
@@ -8137,7 +8151,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
 	u8         port_access_reg_cap_mask_127_to_96[0x20];
 	u8         port_access_reg_cap_mask_95_to_64[0x20];
-	u8         port_access_reg_cap_mask_63_to_32[0x20];
+
+	u8         port_access_reg_cap_mask_63_to_36[0x1c];
+	u8         pplm[0x1];
+	u8         port_access_reg_cap_mask_34_to_32[0x3];
 
 	u8         port_access_reg_cap_mask_31_to_13[0x13];
 	u8         pbmc[0x1];
-- 
cgit v1.2.3


From 67daf1186086ad4b2ec09b8078b835936977d06a Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Sun, 30 Sep 2018 09:58:08 +0300
Subject: net/mlx5: Added "per_lane_error_counters" cap bit to PCAM

Added "Per lane raw errors" capability bit in
Ports Capabilities Mask (PCAM) enhanced features
layout.

This bit determines if the fields "phy_raw_errors_laneX"
in "Physical Layer statistical" counters group are supported.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 47b09a742ae5..dbff9ff28f2c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8140,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         rx_icrc_encapsulated_counter[0x1];
 	u8	   reserved_at_6e[0x8];
 	u8         pfcc_mask[0x1];
-	u8         reserved_at_77[0x4];
+	u8         reserved_at_77[0x3];
+	u8         per_lane_error_counters[0x1];
 	u8         rx_buffer_fullness_counters[0x1];
 	u8         ptys_connector_type[0x1];
 	u8         reserved_at_7d[0x1];
-- 
cgit v1.2.3


From 144991602e6a14d667b295f1b099e609ce857772 Mon Sep 17 00:00:00 2001
From: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Date: Thu, 18 Oct 2018 15:16:09 +0200
Subject: bpf: rename stack trace map operations

In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fa48343a5ea1..7bad4e1947ed 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
-- 
cgit v1.2.3


From 2ea864c58f19bf70a0e2415f9f1c53814e07f1b4 Mon Sep 17 00:00:00 2001
From: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Date: Thu, 18 Oct 2018 15:16:20 +0200
Subject: bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE

ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone
used to save the value of a map.  Basically the same as
ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra
argument.

This will be used in the following patch that implements some new
helpers that receive a pointer to be filled with a map value.

Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60fff48288b..0f8b863e0229 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -138,6 +138,7 @@ enum bpf_arg_type {
 	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
 	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
 	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
+	ARG_PTR_TO_UNINIT_MAP_VALUE,	/* pointer to valid memory used to store a map value */
 
 	/* the following constraints used to prototype bpf_memcmp() and other
 	 * functions that access data on eBPF program stack
-- 
cgit v1.2.3


From f1a2e44a3aeccb3ff18d3ccc0b0203e70b95bd92 Mon Sep 17 00:00:00 2001
From: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Date: Thu, 18 Oct 2018 15:16:25 +0200
Subject: bpf: add queue and stack maps

Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers.  Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:

BPF_MAP_LOOKUP_ELEM            -> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM            -> push

Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.

As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.

Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.

Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h       | 6 ++++++
 include/linux/bpf_types.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0f8b863e0229..33014ae73103 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,9 @@ struct bpf_map_ops {
 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
 	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
 	int (*map_delete_elem)(struct bpf_map *map, void *key);
+	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+	int (*map_pop_elem)(struct bpf_map *map, void *value);
+	int (*map_peek_elem)(struct bpf_map *map, void *value);
 
 	/* funcs called by prog_array and perf_event_array map */
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 7bad4e1947ed..44d9ab4809bd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
-- 
cgit v1.2.3


From b39b5f411dcfce28ff954e5d6acb2c11be3cb0ec Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 19 Oct 2018 09:57:57 -0700
Subject: bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_end() and
bpf_restore_data_end() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..91b4c934f02e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
 	cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_end(
+	struct sk_buff *skb, void **saved_data_end)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	*saved_data_end = cb->data_end;
+	cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_end(
+	struct sk_buff *skb, void *saved_data_end)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	cb->data_end = saved_data_end;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
 	/* eBPF programs may read/write skb->cb[] area to transfer meta
-- 
cgit v1.2.3


From 5032d079909d1ac5c2535acc32d5f01cd245d8ea Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 18 Oct 2018 13:58:35 -0700
Subject: bpf: skmsg, fix psock create on existing kcm/tls port

Before using the psock returned by sk_psock_get() when adding it to a
sockmap we need to ensure it is actually a sockmap based psock.
Previously we were only checking this after incrementing the reference
counter which was an error. This resulted in a slab-out-of-bounds
error when the psock was not actually a sockmap type.

This moves the check up so the reference counter is only used
if it is a sockmap psock.

Eric reported the following KASAN BUG,

BUG: KASAN: slab-out-of-bounds in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
BUG: KASAN: slab-out-of-bounds in refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120
Read of size 4 at addr ffff88019548be58 by task syz-executor4/22387

CPU: 1 PID: 22387 Comm: syz-executor4 Not tainted 4.19.0-rc7+ #264
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272
 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
 refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120
 sk_psock_get include/linux/skmsg.h:379 [inline]
 sock_map_link.isra.6+0x41f/0xe30 net/core/sock_map.c:178
 sock_hash_update_common+0x19b/0x11e0 net/core/sock_map.c:669
 sock_hash_update_elem+0x306/0x470 net/core/sock_map.c:738
 map_update_elem+0x819/0xdf0 kernel/bpf/syscall.c:818

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 22347b08e1f8..84e18863f6a4 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -270,11 +270,6 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
-static inline bool sk_has_psock(struct sock *sk)
-{
-	return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
-}
-
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
 				      struct sk_msg *msg)
 {
@@ -374,6 +369,26 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
 	return test_bit(bit, &psock->state);
 }
 
+static inline struct sk_psock *sk_psock_get_checked(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock) {
+		if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
+			psock = ERR_PTR(-EBUSY);
+			goto out;
+		}
+
+		if (!refcount_inc_not_zero(&psock->refcnt))
+			psock = ERR_PTR(-EBUSY);
+	}
+out:
+	rcu_read_unlock();
+	return psock;
+}
+
 static inline struct sk_psock *sk_psock_get(struct sock *sk)
 {
 	struct sk_psock *psock;
-- 
cgit v1.2.3


From c9fbd71f73094311b31ee703a918e9e0df502cef Mon Sep 17 00:00:00 2001
From: Debabrata Banerjee <dbanerje@akamai.com>
Date: Thu, 18 Oct 2018 11:18:26 -0400
Subject: netpoll: allow cleanup to be synchronous

This fixes a problem introduced by:
commit 2cde6acd49da ("netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock")

When using netconsole on a bond, __netpoll_cleanup can asynchronously
recurse multiple times, each __netpoll_free_async call can result in
more __netpoll_free_async's. This means there is now a race between
cleanup_work queues on multiple netpoll_info's on multiple devices and
the configuration of a new netpoll. For example if a netconsole is set
to enable 0, reconfigured, and enable 1 immediately, this netconsole
will likely not work.

Given the reason for __netpoll_free_async is it can be called when rtnl
is not locked, if it is locked, we should be able to execute
synchronously. It appears to be locked everywhere it's called from.

Generalize the design pattern from the teaming driver for current
callers of __netpoll_free_async.

CC: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 3ef82d3a78db..676f1ff161a9 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -31,8 +31,6 @@ struct netpoll {
 	bool ipv6;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
-
-	struct work_struct cleanup_work;
 };
 
 struct netpoll_info {
@@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt);
 int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
 int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
-void __netpoll_free_async(struct netpoll *np);
+void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
-- 
cgit v1.2.3


From bff5b4b3737219195ca0caef4ff7884303cb5dc1 Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino@microchip.com>
Date: Thu, 18 Oct 2018 15:06:01 -0400
Subject: net: phy: micrel: add Microchip KSZ9131 initial driver

Add support for Microchip Technology KSZ9131 10/100/1000 Ethernet PHY

Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/micrel_phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 472fa4d4ea62..7361cd3fddc1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -31,6 +31,7 @@
 #define PHY_ID_KSZ8081		0x00221560
 #define PHY_ID_KSZ8061		0x00221570
 #define PHY_ID_KSZ9031		0x00221620
+#define PHY_ID_KSZ9131		0x00221640
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
-- 
cgit v1.2.3


From 6fff607e2f14bd7c63c06c464a6f93b8efbabe28 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Oct 2018 19:56:49 -0700
Subject: bpf: sk_msg program helper bpf_msg_push_data

This allows user to push data into a msg using sk_msg program types.
The format is as follows,

	bpf_msg_push_data(msg, offset, len, flags)

this will insert 'len' bytes at offset 'offset'. For example to
prepend 10 bytes at the front of the message the user can,

	bpf_msg_push_data(msg, 0, 10, 0);

This will invalidate data bounds so BPF user will have to then recheck
data bounds after calling this. After this the msg size will have been
updated and the user is free to write into the added bytes. We allow
any offset/len as long as it is within the (data, data_end) range.
However, a copy will be required if the ring is full and its possible
for the helper to fail with ENOMEM or EINVAL errors which need to be
handled by the BPF program.

This can be used similar to XDP metadata to pass data between sk_msg
layer and lower layers.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 84e18863f6a4..2a11e9d91dfa 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
 	return &msg->sg.data[which];
 }
 
+static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which)
+{
+	return msg->sg.data[which];
+}
+
 static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
 {
 	return sg_page(sk_msg_elem(msg, which));
-- 
cgit v1.2.3


From 876dcf2f3aaa0f68d437b368b93a4c4b81521191 Mon Sep 17 00:00:00 2001
From: Olivier Brunel <jjk@jjacky.com>
Date: Sat, 20 Oct 2018 19:39:56 +0200
Subject: umh: Add command line to user mode helpers

User mode helpers were spawned without a command line, and because
an empty command line is used by many tools to identify processes as
kernel threads, this could cause some issues.

Notably during killing spree on shutdown, since such helper would then
be skipped (i.e. not killed) which would result in the process remaining
alive, and thus preventing unmouting of the rootfs (as experienced with
the bpfilter umh).

Fixes: 449325b52b7a ("umh: introduce fork_usermode_blob() helper")
Signed-off-by: Olivier Brunel <jjk@jjacky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/umh.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/umh.h b/include/linux/umh.h
index 5c812acbb80a..235f51b62c71 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -44,6 +44,7 @@ struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
 			  int (*init)(struct subprocess_info *info, struct cred *new),
 			  void (*cleanup)(struct subprocess_info *), void *data);
 struct umh_info {
+	const char *cmdline;
 	struct file *pipe_to_umh;
 	struct file *pipe_from_umh;
 	pid_t pid;
-- 
cgit v1.2.3