From 123aff2a789c3975c2235653939ff00107d6156c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 17 Apr 2020 11:38:02 -0700 Subject: net: phy: broadcom: Add support for BCM53125 internal PHYs BCM53125 has internal Gigabit PHYs which support interrupts as well as statistics, make it possible to configure both of those features with a PHY driver entry. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6462c5447872..7e1d857c8468 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -15,6 +15,7 @@ #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 #define PHY_ID_BCM5395 0x0143bcf0 +#define PHY_ID_BCM53125 0x03625f20 #define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 -- cgit v1.2.3 From bb7fc863729b45f0fbcdea991d0465d855ffd831 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 5 Apr 2020 20:57:00 +0300 Subject: net/mlx5: Provide simplified command interfaces Many mlx5_cmd_exec() callers are not interested in the output from that command or have standard in/out structures. Those callers simply allocate those structure on the stack and use sizeof() to provide in/out arguments. In this naive approach provide simplified versions of mlx5_cmd_exec(). Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6f8f79ef829b..1caddfa85c4d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -903,6 +903,19 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); + +#define mlx5_cmd_exec_inout(dev, ifc_cmd, in, out) \ + ({ \ + mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(ifc_cmd##_in), out, \ + MLX5_ST_SZ_BYTES(ifc_cmd##_out)); \ + }) + +#define mlx5_cmd_exec_in(dev, ifc_cmd, in) \ + ({ \ + u32 _out[MLX5_ST_SZ_DW(ifc_cmd##_out)] = {}; \ + mlx5_cmd_exec_inout(dev, ifc_cmd, in, _out); \ + }) + int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); -- cgit v1.2.3 From 66247fbb280c2a699a8621708c52dae6acd2e4bc Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 3 Apr 2020 11:28:28 +0300 Subject: net/mlx5: Remove Q counter low level helper APIs mlx5 core users are encouraged to use low level API (mlx5_cmd_exec) without the need of helper functions, do this for q counters, remove helper functions and call mlx5_cmd_exec directly from users. This will help reduce the total amount of code and reduction of the mlx5_core symbol table. Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/qp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index ae63b1ae9004..4d25a3d24182 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -595,10 +595,6 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq); -int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id); -int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id); -int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, - int reset, void *out, int out_size); struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, int res_num, -- cgit v1.2.3 From 333fbaa0255b8d471fc7ae767ef3a1766c732d6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 4 Apr 2020 10:40:24 +0300 Subject: net/mlx5: Move QP logic to mlx5_ib The mlx5_core doesn't need any functionality coded in qp.c, so move that file to drivers/infiniband/ be under mlx5_ib responsibility. Reviewed-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/cmd.h | 51 --------------------------------------------- include/linux/mlx5/driver.h | 2 -- include/linux/mlx5/qp.h | 45 --------------------------------------- 3 files changed, 98 deletions(-) delete mode 100644 include/linux/mlx5/cmd.h (limited to 'include/linux') diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h deleted file mode 100644 index 68cd08f02c2f..000000000000 --- a/include/linux/mlx5/cmd.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef MLX5_CMD_H -#define MLX5_CMD_H - -#include - -struct manage_pages_layout { - u64 ptr; - u32 reserved; - u16 num_entries; - u16 func_id; -}; - - -struct mlx5_cmd_alloc_uar_imm_out { - u32 rsvd[3]; - u32 uarn; -}; - -#endif /* MLX5_CMD_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1caddfa85c4d..b60e5ab7906b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -541,7 +541,6 @@ struct mlx5_priv { struct mlx5_core_health health; /* start: qp staff */ - struct mlx5_qp_table qp_table; struct dentry *qp_debugfs; struct dentry *eq_debugfs; struct dentry *cq_debugfs; @@ -687,7 +686,6 @@ struct mlx5_core_dev { unsigned long intf_state; struct mlx5_priv priv; struct mlx5_profile *profile; - atomic_t num_qps; u32 issi; struct mlx5e_resources mlx5e_res; struct mlx5_dm *dm; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4d25a3d24182..ef127a156a62 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -553,53 +553,8 @@ struct mlx5_qp_context { u8 rsvd1[24]; }; -static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) -{ - return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); -} - -int mlx5_core_create_dct(struct mlx5_core_dev *dev, - struct mlx5_core_dct *qp, - u32 *in, int inlen, - u32 *out, int outlen); -int mlx5_core_create_qp(struct mlx5_core_dev *dev, - struct mlx5_core_qp *qp, - u32 *in, - int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, - u32 opt_param_mask, void *qpc, - struct mlx5_core_qp *qp); -int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, - struct mlx5_core_qp *qp); -int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, - struct mlx5_core_dct *dct); -int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - u32 *out, int outlen); -int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, - u32 *out, int outlen); - -int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, - u32 timeout_usec); - -int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); -int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); -void mlx5_init_qp_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); -int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, - struct mlx5_core_qp *rq); -void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, - struct mlx5_core_qp *rq); -int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, - struct mlx5_core_qp *sq); -void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, - struct mlx5_core_qp *sq); - -struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, - int res_num, - enum mlx5_res_type res_type); -void mlx5_core_res_put(struct mlx5_core_rsc_common *res); static inline const char *mlx5_qp_type_str(int type) { -- cgit v1.2.3 From 59e9e8e4fe83f68e599b87c06aaf239dcc64887b Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 14 Jan 2020 05:06:25 +0200 Subject: net/mlx5: Enable SW-defined RoCEv2 UDP source port When this is enabled, UDP source port for RoCEv2 packets are defined by software instead of firmware. Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69b27c7dfc3e..6fa24918eade 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -74,6 +74,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0, MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, + MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, }; enum { @@ -903,7 +904,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { struct mlx5_ifc_roce_cap_bits { u8 roce_apm[0x1]; - u8 reserved_at_1[0x1f]; + u8 reserved_at_1[0x3]; + u8 sw_r_roce_src_udp_port[0x1]; + u8 reserved_at_5[0x1b]; u8 reserved_at_20[0x60]; -- cgit v1.2.3 From eec517cdb4810b3843eb7707971de3164088bff1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 20 Apr 2020 00:11:50 +0200 Subject: net: Add IF_OPER_TESTING RFC 2863 defines the operational state testing. Add support for this state, both as a IF_LINK_MODE_ and __LINK_STATE_. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 130a668049ab..0750b54b3765 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -288,6 +288,7 @@ enum netdev_state_t { __LINK_STATE_NOCARRIER, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, + __LINK_STATE_TESTING, }; @@ -3907,6 +3908,46 @@ static inline bool netif_dormant(const struct net_device *dev) } +/** + * netif_testing_on - mark device as under test. + * @dev: network device + * + * Mark device as under test (as per RFC2863). + * + * The testing state indicates that some test(s) must be performed on + * the interface. After completion, of the test, the interface state + * will change to up, dormant, or down, as appropriate. + */ +static inline void netif_testing_on(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_TESTING, &dev->state)) + linkwatch_fire_event(dev); +} + +/** + * netif_testing_off - set device as not under test. + * @dev: network device + * + * Device is not in testing state. + */ +static inline void netif_testing_off(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_TESTING, &dev->state)) + linkwatch_fire_event(dev); +} + +/** + * netif_testing - test if device is under test + * @dev: network device + * + * Check if device is under test + */ +static inline bool netif_testing(const struct net_device *dev) +{ + return test_bit(__LINK_STATE_TESTING, &dev->state); +} + + /** * netif_oper_up - test if device is operational * @dev: network device -- cgit v1.2.3 From 1dbd51d0a71a561056579e2d4f406e5ce5343af0 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 10 Dec 2019 13:20:55 +0200 Subject: net/mlx5: Refactor mlx5_accel_esp_create_hw_context parameter list Currently the FPGA IPsec is the only hw implementation of the IPsec acceleration api, and so the mlx5_accel_esp_create_hw_context was wrongly made to suit this HW api, among other in its parameter list and some of its parameter endianness. This implementation might not be suitable for different HW. Refactor by group and pass all function arguments of mlx5_accel_esp_create_hw_context in common mlx5_accel_esp_xfrm_attrs struct field of mlx5_accel_esp_xfrm struct and correct the endianness according to the HW being called. Signed-off-by: Raed Salem Reviewed-by: Boris Pismenny Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index 5613e677a5f9..b919d143a9a6 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -92,6 +92,18 @@ struct mlx5_accel_esp_xfrm_attrs { union { struct aes_gcm_keymat aes_gcm; } keymat; + + union { + __be32 a4; + __be32 a6[4]; + } saddr; + + union { + __be32 a4; + __be32 a6[4]; + } daddr; + + u8 is_ipv6; }; struct mlx5_accel_esp_xfrm { -- cgit v1.2.3 From 51161bfc66a68d21f13d15a689b3ea7980457790 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 19 Apr 2020 18:55:06 +0300 Subject: kernel/module: Hide vermagic header file from general use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VERMAGIC* definitions are not supposed to be used by the drivers, see this [1] bug report, so introduce special define to guard inclusion of this header file and define it in kernel/modules.h and in internal script that generates *.mod.c files. In-tree module build: ➜ kernel git:(vermagic) ✗ make clean ➜ kernel git:(vermagic) ✗ make M=drivers/infiniband/hw/mlx5 ➜ kernel git:(vermagic) ✗ modinfo drivers/infiniband/hw/mlx5/mlx5_ib.ko filename: /images/leonro/src/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.ko <...> vermagic: 5.6.0+ SMP mod_unload modversions Out-of-tree module build: ➜ mlx5 make -C /images/leonro/src/kernel clean M=/tmp/mlx5 ➜ mlx5 make -C /images/leonro/src/kernel M=/tmp/mlx5 ➜ mlx5 modinfo /tmp/mlx5/mlx5_ib.ko filename: /tmp/mlx5/mlx5_ib.ko <...> vermagic: 5.6.0+ SMP mod_unload modversions [1] https://lore.kernel.org/lkml/20200411155623.GA22175@zn.tnic Reported-by: Borislav Petkov Acked-by: Borislav Petkov Acked-by: Jessica Yu Co-developed-by: Masahiro Yamada Signed-off-by: Masahiro Yamada Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- include/linux/vermagic.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 9aced11e9000..7768d20ada39 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef INCLUDE_VERMAGIC +#error "This header can be included from kernel/module.c or *.mod.c only" +#endif + #include /* Simply sanity version stamp for modules. */ -- cgit v1.2.3 From 0a32f1ff2a2e41404deaba5fb32f8a0d640c0974 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 20 Apr 2020 20:21:11 +0200 Subject: net: phy: broadcom: add helper to write/read RDB registers RDB (Register Data Base) registers are used on newer Broadcom PHYs. Add helper to read, write and modify these registers. Signed-off-by: Michael Walle Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 7e1d857c8468..897b69309964 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -115,6 +115,9 @@ #define MII_BCM54XX_SHD_VAL(x) ((x & 0x1f) << 10) #define MII_BCM54XX_SHD_DATA(x) ((x & 0x3ff) << 0) +#define MII_BCM54XX_RDB_ADDR 0x1e +#define MII_BCM54XX_RDB_DATA 0x1f + /* * AUXILIARY CONTROL SHADOW ACCESS REGISTERS. (PHY REG 0x18) */ -- cgit v1.2.3 From 6937602ed3f9ebd46ed6a6b5e609c0ae4ed99008 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 20 Apr 2020 20:21:12 +0200 Subject: net: phy: add Broadcom BCM54140 support The Broadcom BCM54140 is a Quad SGMII/QSGMII Copper/Fiber Gigabit Ethernet transceiver. This also adds support for tunables to set and get downshift and energy detect auto power-down. The PHY has four ports and each port has its own PHY address. There are per-port registers as well as global registers. Unfortunately, the global registers can only be accessed by reading and writing from/to the PHY address of the first port. Further, there is no way to find out what port you actually are by just reading the per-port registers. We therefore, have to scan the bus on the PHY probe to determine the port and thus what address we need to access the global registers. Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 897b69309964..8be150e69c7c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -25,6 +25,7 @@ #define PHY_ID_BCM5461 0x002060c0 #define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 +#define PHY_ID_BCM54140 0xae025019 #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM89610 0x03625cd0 -- cgit v1.2.3 From 38f961e744840db9044af68f4773ae5feae60a89 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 20 Apr 2020 23:29:05 +0200 Subject: net: phy: add device-managed devm_mdiobus_register If there's no special ordering requirement for mdiobus_unregister(), then driver code can be simplified by using a device-managed version of mdiobus_register(). Prerequisite is that bus allocation has been done device-managed too. Else mdiobus_free() may be called whilst bus is still registered, resulting in a BUG_ON(). Therefore let devm_mdiobus_register() return -EPERM if bus was allocated non-managed. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 2432ca463ddc..3941a6bcba10 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -241,6 +241,9 @@ struct mii_bus { int (*reset)(struct mii_bus *bus); struct mdio_bus_stats stats[PHY_MAX_ADDR]; + unsigned int is_managed:1; /* is device-managed */ + unsigned int is_managed_registered:1; + /* * A lock to ensure that only one thing can read/write * the MDIO bus at a time @@ -286,6 +289,20 @@ static inline struct mii_bus *mdiobus_alloc(void) int __mdiobus_register(struct mii_bus *bus, struct module *owner); #define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE) +static inline int devm_mdiobus_register(struct mii_bus *bus) +{ + int ret; + + if (!bus->is_managed) + return -EPERM; + + ret = mdiobus_register(bus); + if (!ret) + bus->is_managed_registered = 1; + + return ret; +} + void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); -- cgit v1.2.3 From 5972157c2dde11698d7bcfc55621107d97121c87 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 22 Apr 2020 11:24:55 +0200 Subject: net: mdio: of: export part of of_mdiobus_register_phy() This function will be needed in tja11xx driver for secondary PHY support. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 491a2b7e77c1..0f61a4ac6bcf 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -30,7 +30,9 @@ extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); extern int of_phy_register_fixed_link(struct device_node *np); extern void of_phy_deregister_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); - +extern int of_mdiobus_phy_device_register(struct mii_bus *mdio, + struct phy_device *phy, + struct device_node *child, u32 addr); static inline int of_mdio_parse_addr(struct device *dev, const struct device_node *np) @@ -118,6 +120,13 @@ static inline bool of_phy_is_fixed_link(struct device_node *np) { return false; } + +static inline int of_mdiobus_phy_device_register(struct mii_bus *mdio, + struct phy_device *phy, + struct device_node *child, u32 addr) +{ + return -ENOSYS; +} #endif -- cgit v1.2.3 From 5d1c9a114a6efba2c8391e39d4ac3e4e5c7b6d32 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 7 Apr 2020 18:59:51 +0300 Subject: net/mlx5: Update vport.c to new cmd interface Do mass update of vport.c to reuse newly introduced mlx5_cmd_exec_in*() interfaces. Signed-off-by: Leon Romanovsky --- include/linux/mlx5/vport.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 16060fb9b5e5..8170da1e9f70 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -127,8 +127,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down); int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, - int vf, u8 port_num, void *out, - size_t out_sz); + int vf, u8 port_num, void *out); int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, int vf, -- cgit v1.2.3 From d1f620500cde5c72c7b96a19474733c4c6c67f38 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 9 Apr 2020 11:39:14 +0300 Subject: net/mlx5: Update cq.c to new cmd interface Do mass update of cq.c to reuse newly introduced mlx5_cmd_exec_in*() interfaces. Reviewed-by: Moshe Shemesh Signed-off-by: Leon Romanovsky --- include/linux/mlx5/cq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 40748fc1b11b..b5a9399e07ee 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -188,7 +188,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *out, int outlen); + u32 *out); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen); int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, -- cgit v1.2.3 From e0b4b4722dfac09658d1519b296cf8dc349a2451 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 9 Apr 2020 21:03:33 +0300 Subject: net/mlx5: Update transobj.c new cmd interface Do mass update of transobj.c to reuse newly introduced mlx5_cmd_exec_in*() interfaces. Signed-off-by: Leon Romanovsky --- include/linux/mlx5/transobj.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index dc6b1e7cb8c4..028f442530cf 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -39,27 +39,20 @@ int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn); void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn); int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn); -int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen); +int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in); void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn); int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out); int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn); -int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); +int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out); int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state); -int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *tirn); -int mlx5_core_create_tir_out(struct mlx5_core_dev *dev, - u32 *in, int inlen, - u32 *out, int outlen); -int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, - int inlen); +int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn); +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in); void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); -int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *tisn); -int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, - int inlen); +int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn); +int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn); int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn); -- cgit v1.2.3 From 6f8b12d661d09b488b9ac879b8eafbd2cc4a1450 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Apr 2020 09:13:27 -0700 Subject: net: napi: add hard irqs deferral feature Back in commit 3b47d30396ba ("net: gro: add a per device gro flush timer") we added the ability to arm one high resolution timer, that we used to keep not-complete packets in GRO engine a bit longer, hoping that further frames might be added to them. Since then, we added the napi_complete_done() interface, and commit 364b6055738b ("net: busy-poll: return busypolling status to drivers") allowed drivers to avoid re-arming NIC interrupts if we made a promise that their NAPI poll() handler would be called in the near future. This infrastructure can be leveraged, thanks to a new device parameter, which allows to arm the napi hrtimer, instead of re-arming the device hard IRQ. We have noticed that on some servers with 32 RX queues or more, the chit-chat between the NIC and the host caused by IRQ delivery and re-arming could hurt throughput by ~20% on 100Gbit NIC. In contrast, hrtimers are using local (percpu) resources and might have lower cost. The new tunable, named napi_defer_hard_irqs, is placed in the same hierarchy than gro_flush_timeout (/sys/class/net/ethX/) By default, both gro_flush_timeout and napi_defer_hard_irqs are zero. This patch does not change the prior behavior of gro_flush_timeout if used alone : NIC hard irqs should be rearmed as before. One concrete usage can be : echo 20000 >/sys/class/net/eth1/gro_flush_timeout echo 10 >/sys/class/net/eth1/napi_defer_hard_irqs If at least one packet is retired, then we will reset napi counter to 10 (napi_defer_hard_irqs), ensuring at least 10 periodic scans of the queue. On busy queues, this should avoid NIC hard IRQ, while before this patch IRQ avoidance was only possible if napi->poll() was exhausting its budget and not call napi_complete_done(). This feature also can be used to work around some non-optimal NIC irq coalescing strategies. Having the ability to insert XX usec delays between each napi->poll() can increase cache efficiency, since we increase batch sizes. It also keeps serving cpus not idle too long, reducing tail latencies. Co-developed-by: Luigi Rizzo Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0750b54b3765..5a8d40f1ffe2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -329,6 +329,7 @@ struct napi_struct { unsigned long state; int weight; + int defer_hard_irqs_count; unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL @@ -1995,6 +1996,7 @@ struct net_device { struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; + int napi_defer_hard_irqs; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; -- cgit v1.2.3 From 9166cc49767a646990a73380480356416b7794eb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Mar 2020 15:09:32 +0200 Subject: mac80211: implement Operating Mode Notification extended NSS support Somehow we missed this for a long time, but similar to the extended NSS support in VHT capabilities, we need to have this in Operating Mode notification. Implement it by * parsing the 160/80+80 bit there and setting the bandwidth appropriately * having callers of ieee80211_get_vht_max_nss() pass in the current max NSS value as received in the operating mode notification in order to modify it appropriately depending on the extended NSS bits. This updates all drivers that use it, i.e. only iwlwifi/mvm. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200326150855.098483728cfa.I4e8c25d3288441759c2793247197229f0696a37d@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 16268ef1cbcc..c326aec535c6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2019 Intel Corporation + * Copyright (c) 2018 - 2020 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -859,6 +859,7 @@ enum ieee80211_ht_chanwidth_values { * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width + * @IEEE80211_OPMODE_NOTIF_BW_160_80P80: 160 / 80+80 MHz indicator flag * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask * (the NSS value is the value of this field + 1) * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift @@ -866,11 +867,12 @@ enum ieee80211_ht_chanwidth_values { * using a beamforming steering matrix */ enum ieee80211_vht_opmode_bits { - IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK = 3, + IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK = 0x03, IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ = 0, IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ = 1, IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ = 2, IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ = 3, + IEEE80211_OPMODE_NOTIF_BW_160_80P80 = 0x04, IEEE80211_OPMODE_NOTIF_RX_NSS_MASK = 0x70, IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF = 0x80, @@ -1731,6 +1733,9 @@ struct ieee80211_mu_edca_param_set { * @ext_nss_bw_capable: indicates whether or not the local transmitter * (rate scaling algorithm) can deal with the new logic * (dot11VHTExtendedNSSBWCapable) + * @max_vht_nss: current maximum NSS as advertised by the STA in + * operating mode notification, can be 0 in which case the + * capability data will be used to derive this (from MCS support) * * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can * vary for a given BW/MCS. This function parses the data. @@ -1739,7 +1744,8 @@ struct ieee80211_mu_edca_param_set { */ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, enum ieee80211_vht_chanwidth bw, - int mcs, bool ext_nss_bw_capable); + int mcs, bool ext_nss_bw_capable, + unsigned int max_vht_nss); /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 -- cgit v1.2.3 From 2a392596d8811c6d58c014ec881b159c75a0cf45 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 26 Mar 2020 15:09:35 +0200 Subject: cfg80211: Parse HE membership selector This extends the support for drivers that rebuilds IEs in the FW (same as with HT/VHT). Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200326150855.20feaabfb484.I886252639604c8e3e84b8ef97962f1b0e4beec81@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c326aec535c6..38f513ce7528 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1067,6 +1067,7 @@ struct ieee80211_mgmt { /* Supported rates membership selectors */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 +#define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) -- cgit v1.2.3 From b572510100165ba037ba43dbbb0f05e8da12c741 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Wed, 1 Apr 2020 18:18:02 -0700 Subject: ieee80211: share 802.11 unit conversion helpers MHZ_TO_KHZ, and KHZ_TO_MHZ are useful to drivers and elsewhere so export these in the common ieee80211 header. Move the power helpers also because we might as well. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200402011810.22947-2-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 38f513ce7528..a561db435a4b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3330,6 +3330,16 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) #define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024)) #define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x)) +/* convert frequencies */ +#define MHZ_TO_KHZ(freq) ((freq) * 1000) +#define KHZ_TO_MHZ(freq) ((freq) / 1000) + +/* convert powers */ +#define DBI_TO_MBI(gain) ((gain) * 100) +#define MBI_TO_DBI(gain) ((gain) / 100) +#define DBM_TO_MBM(gain) ((gain) * 100) +#define MBM_TO_DBM(gain) ((gain) / 100) + /** * ieee80211_action_contains_tpc - checks if the frame contains TPC element * @skb: the skb containing the frame, length will be checked -- cgit v1.2.3 From 5c05c1dbb177293636a3f5ea4caa872dfcf50ccd Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 23 Apr 2020 17:02:56 +0100 Subject: net: phylink, dsa: eliminate phylink_fixed_state_cb() Move the callback into the phylink_config structure, rather than providing a callback to set this up. Signed-off-by: Russell King Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phylink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 3f8d37ec5503..cc5b452a184e 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -67,6 +67,9 @@ struct phylink_config { struct device *dev; enum phylink_op_type type; bool pcs_poll; + bool poll_fixed_state; + void (*get_fixed_state)(struct phylink_config *config, + struct phylink_link_state *state); }; /** @@ -366,9 +369,6 @@ void phylink_destroy(struct phylink *); int phylink_connect_phy(struct phylink *, struct phy_device *); int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags); void phylink_disconnect_phy(struct phylink *); -int phylink_fixed_state_cb(struct phylink *, - void (*cb)(struct net_device *dev, - struct phylink_link_state *)); void phylink_mac_change(struct phylink *, bool up); -- cgit v1.2.3 From 3194915486b2bc3f77745774f1731b78f32ff688 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 23 Apr 2020 21:35:36 +0200 Subject: net: phy: remove genphy_no_soft_reset Since 6e2d85ec0559 ("net: phy: Stop with excessive soft reset") we don't need genphy_no_soft_reset() any longer. Not setting callback soft_reset results in a no-op now. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3941a6bcba10..e2bfb9240587 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1251,10 +1251,6 @@ static inline int genphy_config_aneg(struct phy_device *phydev) return __genphy_config_aneg(phydev, false); } -static inline int genphy_no_soft_reset(struct phy_device *phydev) -{ - return 0; -} static inline int genphy_no_ack_interrupt(struct phy_device *phydev) { return 0; -- cgit v1.2.3 From 0456ea170cd665ddbb9503be92e39f96055dd5fa Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 20 Apr 2020 10:46:10 -0700 Subject: bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT} Currently the following prog types don't fall back to bpf_base_func_proto() (instead they have cgroup_base_func_proto which has a limited set of helpers from bpf_base_func_proto): * BPF_PROG_TYPE_CGROUP_DEVICE * BPF_PROG_TYPE_CGROUP_SYSCTL * BPF_PROG_TYPE_CGROUP_SOCKOPT I don't see any specific reason why we shouldn't use bpf_base_func_proto(), every other type of program (except bpf-lirc and, understandably, tracing) use it, so let's fall back to bpf_base_func_proto for those prog types as well. This basically boils down to adding access to the following helpers: * BPF_FUNC_get_prandom_u32 * BPF_FUNC_get_smp_processor_id * BPF_FUNC_get_numa_node_id * BPF_FUNC_tail_call * BPF_FUNC_ktime_get_ns * BPF_FUNC_spin_lock (CAP_SYS_ADMIN) * BPF_FUNC_spin_unlock (CAP_SYS_ADMIN) * BPF_FUNC_jiffies64 (CAP_SYS_ADMIN) I've also added bpf_perf_event_output() because it's really handy for logging and debugging. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200420174610.77494-1-sdf@google.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fd2b2322412d..25da6ff2a880 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1523,6 +1523,7 @@ extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; +extern const struct bpf_func_proto bpf_event_output_data_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From 6890896bd765b0504761c61901c9804fca23bfb2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 24 Apr 2020 16:59:41 -0700 Subject: bpf: Fix missing bpf_base_func_proto in cgroup_base_func_proto for CGROUP_NET=n linux-next build bot reported compile issue [1] with one of its configs. It looks like when we have CONFIG_NET=n and CONFIG_BPF{,_SYSCALL}=y, we are missing the bpf_base_func_proto definition (from net/core/filter.c) in cgroup_base_func_proto. I'm reshuffling the code a bit to make it work. The common helpers are moved into kernel/bpf/helpers.c and the bpf_base_func_proto is exported from there. Also, bpf_get_raw_cpu_id goes into kernel/bpf/core.c akin to existing bpf_user_rnd_u32. [1] https://lore.kernel.org/linux-next/CAKH8qBsBvKHswiX1nx40LgO+BGeTmb1NX8tiTttt_0uu6T3dCA@mail.gmail.com/T/#mff8b0c083314c68c2e2ef0211cb11bc20dc13c72 Fixes: 0456ea170cd6 ("bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT}") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200424235941.58382-1-sdf@google.com --- include/linux/bpf.h | 8 ++++++++ include/linux/filter.h | 2 -- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 25da6ff2a880..5147e11e53ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1215,6 +1215,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, struct bpf_prog *bpf_prog_by_id(u32 id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1365,6 +1366,12 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) { return ERR_PTR(-ENOTSUPP); } + +static inline const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id) +{ + return NULL; +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -1531,6 +1538,7 @@ const struct bpf_func_proto *bpf_tracing_func_proto( /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #if defined(CONFIG_NET) bool bpf_sock_common_is_valid_access(int off, int size, diff --git a/include/linux/filter.h b/include/linux/filter.h index 9b5aa5c483cc..af37318bb1c5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -863,8 +863,6 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); -const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); -- cgit v1.2.3 From 71d19214776e61b33da48f7c1b46e522c7f78221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 26 Apr 2020 09:15:25 -0700 Subject: bpf: add bpf_ktime_get_boot_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a device like a cellphone which is constantly suspending and resuming CLOCK_MONOTONIC is not particularly useful for keeping track of or reacting to external network events. Instead you want to use CLOCK_BOOTTIME. Hence add bpf_ktime_get_boot_ns() as a mirror of bpf_ktime_get_ns() based around CLOCK_BOOTTIME instead of CLOCK_MONOTONIC. Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5147e11e53ff..10960cfabea4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1509,6 +1509,7 @@ extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; +extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; -- cgit v1.2.3 From 26363af5643490a817272e1cc6f1d3f1d550a699 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:35 +0200 Subject: mm: remove watermark_boost_factor_sysctl_handler watermark_boost_factor_sysctl_handler is just a pointless wrapper for proc_dointvec_minmax, so remove it and use proc_dointvec_minmax directly. Signed-off-by: Christoph Hellwig Acked-by: David Rientjes Signed-off-by: Al Viro --- include/linux/mmzone.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b9de7d220fb..f37bb8f187fc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -911,8 +911,6 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int watermark_boost_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; -- cgit v1.2.3 From 2374c09b1c8a883bb9b4b2fc3756703eeb618f4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:36 +0200 Subject: sysctl: remove all extern declaration from sysctl.c Extern declarations in .c files are a bad style and can lead to mismatches. Use existing definitions in headers where they exist, and otherwise move the external declarations to suitable header files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/coredump.h | 4 ++++ include/linux/file.h | 2 ++ include/linux/mm.h | 2 ++ include/linux/mmzone.h | 2 ++ include/linux/pid.h | 3 +++ include/linux/sysctl.h | 8 ++++++++ 6 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index abf4b4e65dbb..7a899e83835d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -22,4 +22,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #endif +extern int core_uses_pid; +extern char core_pattern[]; +extern unsigned int core_pipe_limit; + #endif /* _LINUX_COREDUMP_H */ diff --git a/include/linux/file.h b/include/linux/file.h index 142d102f285e..122f80084a3e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -94,4 +94,6 @@ extern void fd_install(unsigned int fd, struct file *file); extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); +extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; + #endif /* __LINUX_FILE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a323422d783..9c4e7e76dedd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3140,5 +3140,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, pgoff_t first_index, pgoff_t nr); #endif +extern int sysctl_nr_trim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f37bb8f187fc..b2af594ef0f7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -909,6 +909,7 @@ static inline int is_highmem(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; + int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, @@ -925,6 +926,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, extern int numa_zonelist_order_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 diff --git a/include/linux/pid.h b/include/linux/pid.h index cc896f0fc4e3..93543cbc0e6b 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -108,6 +108,9 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new, struct pid_namespace; extern struct pid_namespace init_pid_ns; +extern int pid_max; +extern int pid_max_min, pid_max_max; + /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 02fa84493f23..36143ca40b56 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -207,7 +207,15 @@ void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern int pwrsw_enabled; +extern int unaligned_enabled; +extern int unaligned_dump_stack; +extern int no_unaligned_warning; + extern struct ctl_table sysctl_mount_point[]; +extern struct ctl_table random_table[]; +extern struct ctl_table firmware_config_table[]; +extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -- cgit v1.2.3 From 32927393dc1ccd60fb2bdc05b9e8e88753761469 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:38 +0200 Subject: sysctl: pass kernel pointers to ->proc_handler Instead of having all the sysctl handlers deal with user pointers, which is rather hairy in terms of the BPF interaction, copy the input to and from userspace in common code. This also means that the strings are always NUL-terminated by the common code, making the API a little bit safer. As most handler just pass through the data to one of the common handlers a lot of the changes are mechnical. Signed-off-by: Christoph Hellwig Acked-by: Andrey Ignatov Signed-off-by: Al Viro --- include/linux/bpf-cgroup.h | 9 ++++---- include/linux/compaction.h | 2 +- include/linux/fs.h | 6 ++--- include/linux/ftrace.h | 3 +-- include/linux/hugetlb.h | 15 ++++++------- include/linux/kprobes.h | 2 +- include/linux/latencytop.h | 4 ++-- include/linux/mm.h | 12 +++++----- include/linux/mmzone.h | 23 +++++++++---------- include/linux/nmi.h | 15 +++++-------- include/linux/perf_event.h | 13 +++++------ include/linux/printk.h | 2 +- include/linux/sched/sysctl.h | 44 +++++++++++++----------------------- include/linux/security.h | 2 +- include/linux/sysctl.h | 53 ++++++++++++++++++-------------------------- include/linux/timer.h | 3 +-- include/linux/vmstat.h | 8 +++---- include/linux/writeback.h | 28 +++++++++-------------- 18 files changed, 101 insertions(+), 143 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c11b413d5b1a..0b41fd5fc96b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,8 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void __user *buf, size_t *pcount, - loff_t *ppos, void **new_buf, + void **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, @@ -302,12 +301,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ - buf, count, pos, nbuf, \ + buf, count, pos, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -429,7 +428,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 4b898cdbdf05..a0eabfbeb0e1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -86,7 +86,7 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos); + void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..9b028d260649 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3536,11 +3536,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_dentry(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_inodes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); #define __FMODE_EXEC ((__force int) FMODE_EXEC) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db95244a62d4..ddfc377de0d2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1005,8 +1005,7 @@ extern void disable_trace_on_warning(void); extern int __disable_trace_on_warning; int tracepoint_printk_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 43a1cef8f0f1..92c21c5ccc58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -105,14 +105,13 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); - -#ifdef CONFIG_NUMA -int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -#endif +int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 04bdaf01112c..594265bfd390 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -312,7 +312,7 @@ DEFINE_INSN_CACHE_OPS(optinsn); #ifdef CONFIG_SYSCTL extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, - int write, void __user *buffer, + int write, void *buffer, size_t *length, loff_t *ppos); #endif extern void wait_for_kprobe_optimizer(void); diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index 9022f0c2e2e4..abe3d95f795b 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -38,8 +38,8 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter) void clear_tsk_latency_tracing(struct task_struct *p); -extern int sysctl_latencytop(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c4e7e76dedd..a7b1ef8ed970 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -201,10 +201,10 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; -extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *, - size_t *, loff_t *); -extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, - size_t *, loff_t *); +int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) @@ -2957,8 +2957,8 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); #ifdef CONFIG_SYSCTL extern int sysctl_drop_caches; -int drop_caches_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); #endif void drop_slab(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b2af594ef0f7..93cf20f41e26 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -910,22 +910,21 @@ static inline int is_highmem(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); - -extern int numa_zonelist_order_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); +int numa_zonelist_order_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9003e29cde46..750c7f395ca9 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -202,16 +202,11 @@ static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif struct ctl_table; -extern int proc_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_nmi_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_soft_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_watchdog_thresh(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_watchdog_cpumask(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9c3e7619c929..347ea379622a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1280,15 +1280,12 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -extern int perf_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - +int perf_proc_update_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int perf_event_max_stack_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); /* Access to perf_event_open(2) syscall. */ #define PERF_SECURITY_OPEN 0 diff --git a/include/linux/printk.h b/include/linux/printk.h index e061635e0409..fcde0772ec98 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -189,7 +189,7 @@ extern int printk_delay_msec; extern int dmesg_restrict; extern int -devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf, +devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos); extern void wake_up_klogd(void); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d4f6215ee03f..7b4d3a49b6c5 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -12,9 +12,8 @@ extern unsigned int sysctl_hung_task_panic; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_check_interval_secs; extern int sysctl_hung_task_warnings; -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); #else /* Avoid need for ifdefs elsewhere in the code */ enum { sysctl_hung_task_timeout_secs = 0 }; @@ -43,8 +42,7 @@ extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos); + void *buffer, size_t *length, loff_t *ppos); #endif /* @@ -72,33 +70,21 @@ extern unsigned int sysctl_sched_autogroup_enabled; extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; -extern int sched_rr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -#ifdef CONFIG_UCLAMP_TASK -extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -#endif - -extern int sysctl_numa_balancing(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern int sysctl_schedstats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern unsigned int sysctl_sched_energy_aware; -extern int sched_energy_aware_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int sched_energy_aware_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); #endif #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index a8d9310472df..6aa229b252ce 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -211,7 +211,7 @@ struct request_sock; #ifdef CONFIG_MMU extern int mmap_min_addr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif /* security_inode_init_security callback function to write xattrs */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 36143ca40b56..f2401e45a3c2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -44,35 +44,26 @@ struct ctl_dir; extern const int sysctl_vals[]; -typedef int proc_handler (struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - -extern int proc_dostring(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_douintvec(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_douintvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int proc_dointvec_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - void __user *, size_t *, loff_t *); -extern int proc_do_large_bitmap(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_do_static_key(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos); + +int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, + loff_t *); +int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, + loff_t *); +int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *, + size_t *, loff_t *); +int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_do_static_key(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); /* * Register a set of sysctl names by calling register_sysctl_table @@ -246,7 +237,7 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, #endif /* CONFIG_SYSCTL */ -int sysctl_max_threads(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #endif /* _LINUX_SYSCTL_H */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 0dc19a8c39c9..07910ae5ddd9 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -201,8 +201,7 @@ struct ctl_table; extern unsigned int sysctl_timer_migration; int timer_migration_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif unsigned long __round_jiffies(unsigned long j, int cpu); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 292485f3d24d..cb507151710f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -16,8 +16,8 @@ extern int sysctl_stat_interval; #define DISABLE_NUMA_STAT 0 extern int sysctl_vm_numa_stat; DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); -extern int sysctl_vm_numa_stat_handler(struct ctl_table *table, - int write, void __user *buffer, size_t *length, loff_t *ppos); +int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos); #endif struct reclaim_stat { @@ -274,8 +274,8 @@ void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); struct ctl_table; -int vmstat_refresh(struct ctl_table *, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, + loff_t *ppos); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a19d845dd7eb..f8a7e1a850fb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -362,24 +362,18 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; -extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int dirty_background_ratio_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_background_bytes_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_ratio_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_bytes_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int dirtytime_interval_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - -struct ctl_table; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); -- cgit v1.2.3 From 4b8d7d4c599182393421c190bae3604b4db9629a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Sun, 26 Apr 2020 15:22:00 +0200 Subject: bridge: mrp: Extend bridge interface To integrate MRP into the bridge, first the bridge needs to be aware of ports that are part of an MRP ring and which rings are on the bridge. Therefore extend bridge interface with the following: - add new flag(BR_MPP_AWARE) to the net bridge ports, this bit will be set when the port is added to an MRP instance. In this way it knows if the frame was received on MRP ring port - add new flag(BR_MRP_LOST_CONT) to the net bridge ports, this bit will be set when the port lost the continuity of MRP Test frames. - add a list of MRP instances Reviewed-by: Nikolay Aleksandrov Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 9e57c4411734..b3a8d3054af0 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -47,6 +47,8 @@ struct br_ip_list { #define BR_BCAST_FLOOD BIT(14) #define BR_NEIGH_SUPPRESS BIT(15) #define BR_ISOLATED BIT(16) +#define BR_MRP_AWARE BIT(17) +#define BR_MRP_LOST_CONT BIT(18) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From d65dbedfd298344747033f17c1efd2afc8082bc7 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 24 Apr 2020 12:45:02 -0700 Subject: net/mlx5: Add support for COPY steering action Add COPY type to modify_header action. IPsec feature is the first feature that needs COPY steering action. Signed-off-by: Huy Nguyen Signed-off-by: Raed Salem Signed-off-by: Saeed Mahameed Acked-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6fa24918eade..3ad2c51ccde9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5670,9 +5670,9 @@ struct mlx5_ifc_copy_action_in_bits { u8 reserved_at_38[0x8]; }; -union mlx5_ifc_set_action_in_add_action_in_auto_bits { - struct mlx5_ifc_set_action_in_bits set_action_in; - struct mlx5_ifc_add_action_in_bits add_action_in; +union mlx5_ifc_set_add_copy_action_in_auto_bits { + struct mlx5_ifc_set_action_in_bits set_action_in; + struct mlx5_ifc_add_action_in_bits add_action_in; struct mlx5_ifc_copy_action_in_bits copy_action_in; u8 reserved_at_0[0x40]; }; @@ -5746,7 +5746,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits { u8 reserved_at_68[0x10]; u8 num_of_actions[0x8]; - union mlx5_ifc_set_action_in_add_action_in_auto_bits actions[0]; + union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0]; }; struct mlx5_ifc_dealloc_modify_header_context_out_bits { -- cgit v1.2.3 From 2b58f6d9df50f534fe465113b69de60a2ef0e74a Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Fri, 24 Apr 2020 12:45:03 -0700 Subject: net/mlx5: Introduce IPsec Connect-X offload hardware bits and structures Add IPsec offload related IFC structs, layouts and enumerations. Signed-off-by: Raed Salem Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 +++ include/linux/mlx5/mlx5_ifc.h | 78 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2b90097a6cf9..7b57877e501e 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1107,6 +1107,7 @@ enum mlx5_cap_type { MLX5_CAP_TLS, MLX5_CAP_VDPA_EMULATION = 0x13, MLX5_CAP_DEV_EVENT = 0x14, + MLX5_CAP_IPSEC, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1324,6 +1325,9 @@ enum mlx5_qcam_feature_groups { MLX5_GET64(device_virtio_emulation_cap, \ (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap) +#define MLX5_CAP_IPSEC(mdev, cap)\ + MLX5_GET(ipsec_cap, (mdev)->caps.hca_cur[MLX5_CAP_IPSEC], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3ad2c51ccde9..cf971d341189 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -886,7 +886,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_stateless_vxlan_gpe[0x1]; u8 tunnel_stateless_ipv4_over_vxlan[0x1]; u8 tunnel_stateless_ip_over_ip[0x1]; - u8 reserved_at_2a[0x6]; + u8 insert_trailer[0x1]; + u8 reserved_at_2b[0x5]; u8 max_vxlan_udp_ports[0x8]; u8 reserved_at_38[0x6]; u8 max_geneve_opt_len[0x1]; @@ -1100,6 +1101,23 @@ struct mlx5_ifc_tls_cap_bits { u8 reserved_at_20[0x7e0]; }; +struct mlx5_ifc_ipsec_cap_bits { + u8 ipsec_full_offload[0x1]; + u8 ipsec_crypto_offload[0x1]; + u8 ipsec_esn[0x1]; + u8 ipsec_crypto_esp_aes_gcm_256_encrypt[0x1]; + u8 ipsec_crypto_esp_aes_gcm_128_encrypt[0x1]; + u8 ipsec_crypto_esp_aes_gcm_256_decrypt[0x1]; + u8 ipsec_crypto_esp_aes_gcm_128_decrypt[0x1]; + u8 reserved_at_7[0x4]; + u8 log_max_ipsec_offload[0x5]; + u8 reserved_at_10[0x10]; + + u8 min_log_ipsec_full_replay_window[0x8]; + u8 max_log_ipsec_full_replay_window[0x8]; + u8 reserved_at_30[0x7d0]; +}; + enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, @@ -1464,7 +1482,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; - u8 reserved_at_468[0x3]; + u8 reserved_at_468[0x2]; + u8 ipsec_offload[0x1]; u8 log_max_umem[0x5]; u8 max_num_eqs[0x10]; @@ -4143,7 +4162,8 @@ enum { MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION = 0x0, MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_TAG = 0x1, MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST = 0x2, - MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS = 0x3 + MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS = 0x3, + MLX5_SET_FTE_MODIFY_ENABLE_MASK_IPSEC_OBJ_ID = 0x4 }; struct mlx5_ifc_set_fte_out_bits { @@ -10468,10 +10488,62 @@ struct mlx5_ifc_affiliated_event_header_bits { enum { MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc), + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT(0x13), }; enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, + MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, +}; + +enum { + MLX5_IPSEC_OBJECT_ICV_LEN_16B, + MLX5_IPSEC_OBJECT_ICV_LEN_12B, + MLX5_IPSEC_OBJECT_ICV_LEN_8B, +}; + +struct mlx5_ifc_ipsec_obj_bits { + u8 modify_field_select[0x40]; + u8 full_offload[0x1]; + u8 reserved_at_41[0x1]; + u8 esn_en[0x1]; + u8 esn_overlap[0x1]; + u8 reserved_at_44[0x2]; + u8 icv_length[0x2]; + u8 reserved_at_48[0x4]; + u8 aso_return_reg[0x4]; + u8 reserved_at_50[0x10]; + + u8 esn_msb[0x20]; + + u8 reserved_at_80[0x8]; + u8 dekn[0x18]; + + u8 salt[0x20]; + + u8 implicit_iv[0x40]; + + u8 reserved_at_100[0x700]; +}; + +struct mlx5_ifc_create_ipsec_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_ipsec_obj_bits ipsec_object; +}; + +enum { + MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP = BIT(0), + MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB = BIT(1), +}; + +struct mlx5_ifc_query_ipsec_obj_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + struct mlx5_ifc_ipsec_obj_bits ipsec_object; +}; + +struct mlx5_ifc_modify_ipsec_obj_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + struct mlx5_ifc_ipsec_obj_bits ipsec_object; }; struct mlx5_ifc_encryption_key_obj_bits { -- cgit v1.2.3 From dff8e2d15283dd92582ddeec25ca86e4cf2618c7 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Fri, 24 Apr 2020 12:45:04 -0700 Subject: net/mlx5: Use aligned variable while allocating ICM memory The alignment value is part of the input structure, so use it and spare extra memory allocation when is not needed. Now, using the new ability when allocating icm for Direct-Rule insertion. Signed-off-by: Ariel Levkovich Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b60e5ab7906b..b46537a81703 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1080,7 +1080,8 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, - u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id); + u64 length, u32 log_alignment, u16 uid, + phys_addr_t *addr, u32 *obj_id); int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id); -- cgit v1.2.3 From 244faedfd4d8e8c8e9f3c628d29bb74196b49743 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Fri, 24 Apr 2020 12:45:05 -0700 Subject: net/mlx5: Refactor imm_inval_pkey field in cqe struct The imm_inval_pkey field can hold four different types of data, depends on the usage, the data could be one of the below: - Immediate field of the received message - Invalidate rkey - Pkey of the packet - Flow table metadata Current implementation doesn't reflect the intended usage of the field at usage time. Reflect the different types by replace this field with a union, modify code where this field is used to reflect its intended usage. Signed-off-by: Raed Salem Reviewed-by: Huy Nguyen Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 7b57877e501e..746e17473d72 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -767,7 +767,12 @@ struct mlx5_cqe64 { u8 l4_l3_hdr_type; __be16 vlan_info; __be32 srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */ - __be32 imm_inval_pkey; + union { + __be32 immediate; + __be32 inval_rkey; + __be32 pkey; + __be32 ft_metadata; + }; u8 rsvd40[4]; __be32 byte_cnt; __be32 timestamp_h; -- cgit v1.2.3 From 06939536263d684073a30543930622eede633af1 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 24 Apr 2020 12:45:06 -0700 Subject: net/mlx5: Add structure layout and defines for MFRL register Add needed structure layouts and defines for MFRL (Management Firmware Reset Level) register. This structure will be used for the firmware upgrade and reset flow in the downstream patches. Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b46537a81703..d82dbbab8179 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -130,6 +130,7 @@ enum { MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, + MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, MLX5_REG_MTRC_CAP = 0x9040, MLX5_REG_MTRC_CONF = 0x9041, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cf971d341189..9e6a3cec1e32 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9703,6 +9703,29 @@ struct mlx5_ifc_mcda_reg_bits { u8 data[0][0x20]; }; +enum { + MLX5_MFRL_REG_RESET_TYPE_FULL_CHIP = BIT(0), + MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE = BIT(1), +}; + +enum { + MLX5_MFRL_REG_RESET_LEVEL0 = BIT(0), + MLX5_MFRL_REG_RESET_LEVEL3 = BIT(3), + MLX5_MFRL_REG_RESET_LEVEL6 = BIT(6), +}; + +struct mlx5_ifc_mfrl_reg_bits { + u8 reserved_at_0[0x20]; + + u8 reserved_at_20[0x2]; + u8 pci_sync_for_fw_update_start[0x1]; + u8 pci_sync_for_fw_update_resp[0x2]; + u8 rst_type_sel[0x3]; + u8 reserved_at_28[0x8]; + u8 reset_type[0x8]; + u8 reset_level[0x8]; +}; + struct mlx5_ifc_mirc_reg_bits { u8 reserved_at_0[0x18]; u8 status_code[0x8]; @@ -9766,6 +9789,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mcc_reg_bits mcc_reg; struct mlx5_ifc_mcda_reg_bits mcda_reg; struct mlx5_ifc_mirc_reg_bits mirc_reg; + struct mlx5_ifc_mfrl_reg_bits mfrl_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From 3df0107784ceb388039b1fe510a8c7b8816de8f0 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 24 Apr 2020 12:45:07 -0700 Subject: net/mlx5: Add structure and defines for pci sync for fw update event Add needed structure layouts and defines for pci sync for fw update event. The downstream patches will include event handlers for this event type. Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 15 +++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 746e17473d72..de93f0b67973 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -364,6 +364,7 @@ enum { enum { MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5, + MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT = 0x8, }; enum { @@ -689,6 +690,19 @@ struct mlx5_eqe_temp_warning { __be64 sensor_warning_lsb; } __packed; +#define SYNC_RST_STATE_MASK 0xf + +enum sync_rst_state_type { + MLX5_SYNC_RST_STATE_RESET_REQUEST = 0x0, + MLX5_SYNC_RST_STATE_RESET_NOW = 0x1, + MLX5_SYNC_RST_STATE_RESET_ABORT = 0x2, +}; + +struct mlx5_eqe_sync_fw_update { + u8 reserved_at_0[3]; + u8 sync_rst_state; +}; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -707,6 +721,7 @@ union ev_data { struct mlx5_eqe_dct dct; struct mlx5_eqe_temp_warning temp_warning; struct mlx5_eqe_xrq_err xrq_err; + struct mlx5_eqe_sync_fw_update sync_fw_update; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9e6a3cec1e32..058ded202b65 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1317,7 +1317,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 wol_p[0x1]; u8 stat_rate_support[0x10]; - u8 reserved_at_1f0[0xc]; + u8 reserved_at_1f0[0x1]; + u8 pci_sync_for_fw_update_event[0x1]; + u8 reserved_at_1f2[0xa]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; -- cgit v1.2.3 From ee5cdf7a5e8945372c7496e98de2b364e095b60b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Fri, 24 Apr 2020 12:45:08 -0700 Subject: net/mlx5: Introduce TLS RX offload hardware bits Add TLS RX offload related IFC hardware fields and enumerations. Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 18 ++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 5 +++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index de93f0b67973..1bc27aca648b 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -450,10 +450,12 @@ enum { enum { MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x1, + MLX5_OPC_MOD_TLS_TIR_STATIC_PARAMS = 0x2, }; enum { MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x1, + MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS = 0x2, }; enum { @@ -764,7 +766,7 @@ struct mlx5_err_cqe { }; struct mlx5_cqe64 { - u8 outer_l3_tunneled; + u8 tls_outer_l3_tunneled; u8 rsvd0; __be16 wqe_id; u8 lro_tcppsh_abort_dupack; @@ -854,7 +856,12 @@ static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe) { - return cqe->outer_l3_tunneled & 0x1; + return cqe->tls_outer_l3_tunneled & 0x1; +} + +static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe) +{ + return (cqe->tls_outer_l3_tunneled >> 3) & 0x3; } static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe) @@ -942,6 +949,13 @@ enum { CQE_L4_OK = 1 << 2, }; +enum { + CQE_TLS_OFFLOAD_NOT_DECRYPTED = 0x0, + CQE_TLS_OFFLOAD_DECRYPTED = 0x1, + CQE_TLS_OFFLOAD_RESYNC = 0x2, + CQE_TLS_OFFLOAD_ERROR = 0x3, +}; + struct mlx5_sig_err_cqe { u8 rsvd0[16]; __be32 expected_trans_sig; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 058ded202b65..6a6bb5dc7916 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1491,7 +1491,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_480[0x1]; u8 tls_tx[0x1]; - u8 reserved_at_482[0x1]; + u8 tls_rx[0x1]; u8 log_max_l2_table[0x5]; u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; @@ -3136,7 +3136,8 @@ struct mlx5_ifc_tirc_bits { u8 reserved_at_0[0x20]; u8 disp_type[0x4]; - u8 reserved_at_24[0x1c]; + u8 tls_en[0x1]; + u8 reserved_at_25[0x1b]; u8 reserved_at_40[0x40]; -- cgit v1.2.3 From 0e1533bb9cce2c6b2aecdfddfcc0de3beeaddc7b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 24 Apr 2020 12:45:09 -0700 Subject: net/mlx5: Add release all pages capability bit Add a bit in HCA capabilities layout to indicate if release all pages is supported. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6a6bb5dc7916..fb243848132d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1244,7 +1244,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_130[0xa]; u8 log_max_ra_res_dc[0x6]; - u8 reserved_at_140[0x9]; + u8 reserved_at_140[0x6]; + u8 release_all_pages[0x1]; + u8 reserved_at_147[0x2]; u8 roce_accl[0x1]; u8 log_max_ra_req_qp[0x6]; u8 reserved_at_150[0xa]; -- cgit v1.2.3 From 2dc8b5246d2c94f732c02e7a688d8a9c0c65361f Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Fri, 24 Apr 2020 12:45:10 -0700 Subject: net/mlx5: TX WQE Add trailer insertion field Add new TX WQE field for Connect-X6DX trailer insertion support, when set, the HW adds a trailer to the packet, the WQE trailer association flags are used to set to HW the header which the trailer belongs. Signed-off-by: Raed Salem Signed-off-by: Saeed Mahameed --- include/linux/mlx5/qp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index ef127a156a62..f23eb18526fe 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -229,6 +229,11 @@ enum { enum { MLX5_ETH_WQE_SVLAN = 1 << 0, + MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC = 1 << 26, + MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC = 1 << 27, + MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC = 3 << 26, + MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC = 1 << 28, + MLX5_ETH_WQE_INSERT_TRAILER = 1 << 30, MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; @@ -257,6 +262,7 @@ struct mlx5_wqe_eth_seg { __be16 type; __be16 vlan_tci; } insert; + __be32 trailer; }; }; -- cgit v1.2.3 From f9d041271cf44ca02eed0cc82e1a6d8c814c53ed Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:05 -0700 Subject: bpf: Refactor bpf_link update handling Make bpf_link update support more generic by making it into another bpf_link_ops methods. This allows generic syscall handling code to be agnostic to various conditionally compiled features (e.g., the case of CONFIG_CGROUP_BPF). This also allows to keep link type-specific code to remain static within respective code base. Refactor existing bpf_cgroup_link code and take advantage of this. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-2-andriin@fb.com --- include/linux/bpf-cgroup.h | 12 ------------ include/linux/bpf.h | 3 ++- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0b41fd5fc96b..a9cb9a5bf8e9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -100,8 +100,6 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type); -int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, - struct bpf_prog *new_prog); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -112,8 +110,6 @@ int cgroup_bpf_attach(struct cgroup *cgrp, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); -int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, - struct bpf_prog *new_prog); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -353,7 +349,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, #else struct bpf_prog; -struct bpf_link; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -377,13 +372,6 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, return -EINVAL; } -static inline int cgroup_bpf_replace(struct bpf_link *link, - struct bpf_prog *old_prog, - struct bpf_prog *new_prog) -{ - return -EINVAL; -} - static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 10960cfabea4..81c8620cb4c4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1093,7 +1093,8 @@ struct bpf_link { struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); - + int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog); }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, -- cgit v1.2.3 From a3b80e1078943dc12553166fb08e258463dec013 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:06 -0700 Subject: bpf: Allocate ID for bpf_link Generate ID for each bpf_link using IDR, similarly to bpf_map and bpf_prog. bpf_link creation, initialization, attachment, and exposing to user-space through FD and ID is a complicated multi-step process, abstract it away through bpf_link_primer and bpf_link_prime(), bpf_link_settle(), and bpf_link_cleanup() internal API. They guarantee that until bpf_link is properly attached, user-space won't be able to access partially-initialized bpf_link either from FD or ID. All this allows to simplify bpf_link attachment and error handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-3-andriin@fb.com --- include/linux/bpf.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 81c8620cb4c4..875d1f0af803 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1085,11 +1085,19 @@ int bpf_prog_new_fd(struct bpf_prog *prog); struct bpf_link { atomic64_t refcnt; + u32 id; const struct bpf_link_ops *ops; struct bpf_prog *prog; struct work_struct work; }; +struct bpf_link_primer { + struct bpf_link *link; + struct file *file; + int fd; + u32 id; +}; + struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); @@ -1097,10 +1105,11 @@ struct bpf_link_ops { struct bpf_prog *old_prog); }; -void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, - struct bpf_prog *prog); -void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, - int link_fd); +void bpf_link_init(struct bpf_link *link, + const struct bpf_link_ops *ops, struct bpf_prog *prog); +int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); +int bpf_link_settle(struct bpf_link_primer *primer); +void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From f2e10bff16a0fdd41ba278c84da9813700e356af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:08 -0700 Subject: bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link Add ability to fetch bpf_link details through BPF_OBJ_GET_INFO_BY_FD command. Also enhance show_fdinfo to potentially include bpf_link type-specific information (similarly to obj_info). Also introduce enum bpf_link_type stored in bpf_link itself and expose it in UAPI. bpf_link_tracing also now will store and return bpf_attach_type. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-5-andriin@fb.com --- include/linux/bpf-cgroup.h | 2 -- include/linux/bpf.h | 8 +++++++- include/linux/bpf_types.h | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a9cb9a5bf8e9..272626cc3fc9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -57,8 +57,6 @@ struct bpf_cgroup_link { enum bpf_attach_type type; }; -extern const struct bpf_link_ops bpf_cgroup_link_lops; - struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 875d1f0af803..c07b1d2f3824 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1026,9 +1026,11 @@ extern const struct file_operations bpf_prog_fops; extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; +#define BPF_LINK_TYPE(_id, _name) #include #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; @@ -1086,6 +1088,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog); struct bpf_link { atomic64_t refcnt; u32 id; + enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; struct work_struct work; @@ -1103,9 +1106,12 @@ struct bpf_link_ops { void (*dealloc)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); + void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); + int (*fill_link_info)(const struct bpf_link *link, + struct bpf_link_info *info); }; -void bpf_link_init(struct bpf_link *link, +void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index ba0c2d56f8a3..8345cdf553b8 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -118,3 +118,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) #if defined(CONFIG_BPF_JIT) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif + +BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) +#ifdef CONFIG_CGROUP_BPF +BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) +#endif -- cgit v1.2.3 From 6e3a401fc8af01828bcdc92d713195d318b36e7e Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Thu, 30 Apr 2020 18:51:14 +0300 Subject: inet_diag: add cgroup id attribute This patch adds cgroup v2 ID to common inet diag message attributes. Cgroup v2 ID is kernfs ID (ino or ino+gen). This attribute allows filter inet diag output by cgroup ID obtained by name_to_handle_at() syscall. When net_cls or net_prio cgroup is activated this ID is equal to 1 (root cgroup ID) for newly created sockets. Some notes about this ID: 1) gets initialized in socket() syscall 2) incoming socket gets ID from listening socket (not during accept() syscall) 3) not changed when process get moved to another cgroup 4) can point to deleted cgroup (refcounting) v2: - use CONFIG_SOCK_CGROUP_DATA instead if CONFIG_CGROUPS v3: - fix attr size by using nla_total_size_64bit() (Eric Dumazet) - more detailed commit message (Konstantin Khlebnikov) Signed-off-by: Dmitry Yakunin Reviewed-by: Konstantin Khlebnikov Acked-By: Tejun Heo Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index ce9ed1c0602f..0ef2d800fda7 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -71,7 +71,11 @@ static inline size_t inet_diag_msg_attrs_size(void) + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ #endif + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4); /* INET_DIAG_CLASS_ID */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ +#ifdef CONFIG_SOCK_CGROUP_DATA + + nla_total_size_64bit(sizeof(u64)) /* INET_DIAG_CGROUP_ID */ +#endif + ; } int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, -- cgit v1.2.3 From ea5bacaa2cec6967ed337f4d0ad6034123ca737b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 30 Apr 2020 18:04:03 +0200 Subject: docs: networking: convert netdev-features.txt to ReST Not much to be done here: - add SPDX header; - adjust titles and chapters, adding proper markups; - add to networking/index.rst. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9d53c5ad272c..2cc3cf80b49a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -89,7 +89,7 @@ enum { * Add your fresh new feature above and remember to update * netdev_features_strings[] in net/core/ethtool.c and maybe * some feature mask #defines below. Please also describe it - * in Documentation/networking/netdev-features.txt. + * in Documentation/networking/netdev-features.rst. */ /**/NETDEV_FEATURE_COUNT -- cgit v1.2.3 From 2b195850128f5bafde177b12489d9fa27962cc1e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Apr 2020 10:35:41 -0700 Subject: tcp: add tp->dup_ack_counter In commit 86de5921a3d5 ("tcp: defer SACK compression after DupThresh") I added a TCP_FASTRETRANS_THRESH bias to tp->compressed_ack in order to enable sack compression only after 3 dupacks. Since we plan to relax this rule for flows that involve stacks not requiring this old rule, this patch adds a distinct tp->dup_ack_counter. This means the TCP_FASTRETRANS_THRESH value is now used in a single location that a future patch can adjust: if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) { tp->dup_ack_counter++; goto send_now; } This patch also introduces tcp_sack_compress_send_ack() helper to ease following patch comprehension. This patch refines LINUX_MIB_TCPACKCOMPRESSED to not count the acks that we had to send if the timer expires or tcp_sack_compress_send_ack() is sending an ack. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 421c99c12291..2c6f87e9f0cf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,7 @@ struct tcp_sock { } rack; u16 advmss; /* Advertised MSS */ u8 compressed_ack; + u8 dup_ack_counter; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ -- cgit v1.2.3 From f256356f65e6449a9fcf6089ea25882c91768665 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 27 Apr 2020 11:39:03 +0800 Subject: ptp_qoriq: output PPS signal on FIPER2 in default Output PPS signal on FIPER2 (Fixed Period Interval Pulse) in default which is more desired by user. Signed-off-by: Yangbo Lu Signed-off-by: David S. Miller --- include/linux/fsl/ptp_qoriq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index 75884563059f..884b8f8ca06d 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -135,7 +135,7 @@ struct ptp_qoriq_registers { #define DEFAULT_CKSEL 1 #define DEFAULT_TMR_PRSC 2 #define DEFAULT_FIPER1_PERIOD 1000000000 -#define DEFAULT_FIPER2_PERIOD 100000 +#define DEFAULT_FIPER2_PERIOD 1000000000 struct ptp_qoriq { void __iomem *base; -- cgit v1.2.3 From e4e51da66dc812176cca16b0f8a5b87b173deb5d Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 29 Apr 2020 01:06:59 +0200 Subject: net: phy: bcm54140: add second PHY ID This PHY has two PHY IDs depending on its mode. Adjust the mask so that it includes both IDs. Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 8be150e69c7c..58d0150acc3e 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -25,7 +25,7 @@ #define PHY_ID_BCM5461 0x002060c0 #define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 -#define PHY_ID_BCM54140 0xae025019 +#define PHY_ID_BCM54140 0xae025009 #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM89610 0x03625cd0 -- cgit v1.2.3 From d46edd671a147032e22cfeb271a5734703093649 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:04 -0700 Subject: bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS Currently, sysctl kernel.bpf_stats_enabled controls BPF runtime stats. Typical userspace tools use kernel.bpf_stats_enabled as follows: 1. Enable kernel.bpf_stats_enabled; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Disable kernel.bpf_stats_enabled. The problem with this approach is that only one userspace tool can toggle this sysctl. If multiple tools toggle the sysctl at the same time, the measurement may be inaccurate. To fix this problem while keep backward compatibility, introduce a new bpf command BPF_ENABLE_STATS. On success, this command enables stats and returns a valid fd. BPF_ENABLE_STATS takes argument "type". Currently, only one type, BPF_STATS_RUN_TIME, is supported. We can extend the command to support other types of stats in the future. With BPF_ENABLE_STATS, user space tool would have the following flow: 1. Get a fd with BPF_ENABLE_STATS, and make sure it is valid; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Close the fd. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-2-songliubraving@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c07b1d2f3824..1262ec460ab3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -987,6 +987,7 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern struct mutex bpf_stats_enabled_mutex; /* * Block execution of BPF programs attached to instrumentation (perf, -- cgit v1.2.3 From cff9f12b18915d957a2130885a00f8ab15cff7e4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:31 +0300 Subject: net/core: Introduce netdev_get_xmit_slave Add new ndo to get the xmit slave of master device. The reference counters are not incremented so the caller must be careful with locks. User can ask to get the xmit slave assume all the slaves can transmit by set all_slaves arg to true. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: David Ahern Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- include/linux/netdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 130a668049ab..26bc0f11b7ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1146,6 +1146,12 @@ struct netdev_net_notifier { * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * + * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev, + * struct sk_buff *skb, + * bool all_slaves); + * Get the xmit slave of master device. If all_slaves is true, function + * assume all the slaves can transmit. + * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -1389,6 +1395,9 @@ struct net_device_ops { struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, @@ -2731,6 +2740,9 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); -- cgit v1.2.3 From c6bc6041b10f70b617f2d13894311fe62027d292 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:41 +0300 Subject: net/mlx5: Add support to get lag physical port Add function to get the device physical port of the lag slave. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d82dbbab8179..267dfcc5493e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, -- cgit v1.2.3 From 184ecc9eb260d5a3bcdddc5bebd18f285ac004e9 Mon Sep 17 00:00:00 2001 From: Vincent Cheng Date: Fri, 1 May 2020 23:35:36 -0400 Subject: ptp: Add adjphase function to support phase offset control. Adds adjust phase function to take advantage of a PHC clock's hardware filtering capability that uses phase offset control word instead of frequency offset control word. Signed-off-by: Vincent Cheng Reviewed-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 121a7eda4593..31144d954d89 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -36,7 +36,7 @@ struct ptp_system_timestamp { }; /** - * struct ptp_clock_info - decribes a PTP hardware clock + * struct ptp_clock_info - describes a PTP hardware clock * * @owner: The clock driver should set to THIS_MODULE. * @name: A short "friendly name" to identify the clock and to @@ -65,6 +65,9 @@ struct ptp_system_timestamp { * parameter delta: Desired frequency offset from nominal frequency * in parts per billion * + * @adjphase: Adjusts the phase offset of the hardware clock. + * parameter delta: Desired change in nanoseconds. + * * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. * @@ -128,6 +131,7 @@ struct ptp_clock_info { struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); + int (*adjphase)(struct ptp_clock_info *ptp, s32 phase); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); int (*gettimex64)(struct ptp_clock_info *ptp, struct timespec64 *ts, -- cgit v1.2.3 From b86cd700edd3bfe27f631649727b7796067bb3fd Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 4 May 2020 19:27:00 +0200 Subject: net: add helper eth_hw_addr_crc Several drivers use the same code as basis for filter hashes. Therefore let's factor it out to a helper. This way drivers don't have to access struct netdev_hw_addr internals. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 8801f1f986e5..2e5debc0373c 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -265,6 +266,17 @@ static inline void eth_hw_addr_random(struct net_device *dev) eth_random_addr(dev->dev_addr); } +/** + * eth_hw_addr_crc - Calculate CRC from netdev_hw_addr + * @ha: pointer to hardware address + * + * Calculate CRC from a hardware address as basis for filter hashes. + */ +static inline u32 eth_hw_addr_crc(struct netdev_hw_addr *ha) +{ + return ether_crc(ETH_ALEN, ha->addr); +} + /** * ether_addr_copy - Copy an Ethernet address * @dst: Pointer to a six-byte array Ethernet address destination -- cgit v1.2.3 From 1a33e10e4a95cb109ff1145098175df3113313ef Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 2 May 2020 22:22:19 -0700 Subject: net: partially revert dynamic lockdep key changes This patch reverts the folowing commits: commit 064ff66e2bef84f1153087612032b5b9eab005bd "bonding: add missing netdev_update_lockdep_key()" commit 53d374979ef147ab51f5d632dfe20b14aebeccd0 "net: avoid updating qdisc_xmit_lock_key in netdev_update_lockdep_key()" commit 1f26c0d3d24125992ab0026b0dab16c08df947c7 "net: fix kernel-doc warning in " commit ab92d68fc22f9afab480153bd82a20f6e2533769 "net: core: add generic lockdep keys" but keeps the addr_list_lock_key because we still lock addr_list_lock nestedly on stack devices, unlikely xmit_lock this is safe because we don't take addr_list_lock on any fast path. Reported-and-tested-by: syzbot+aaa6fa4949cc5d9b7b25@syzkaller.appspotmail.com Cc: Dmitry Vyukov Cc: Taehee Yoo Signed-off-by: Cong Wang Acked-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5a8d40f1ffe2..7725efd6e48a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1805,13 +1805,11 @@ enum netdev_priv_flags { * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. - * @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock - * spinlock - * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount - * @qdisc_xmit_lock_key: lockdep class annotating - * netdev_queue->_xmit_lock spinlock + * * @addr_list_lock_key: lockdep class annotating * net_device->addr_list_lock spinlock + * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock + * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the @@ -2112,10 +2110,9 @@ struct net_device { #endif struct phy_device *phydev; struct sfp_bus *sfp_bus; - struct lock_class_key qdisc_tx_busylock_key; - struct lock_class_key qdisc_running_key; - struct lock_class_key qdisc_xmit_lock_key; struct lock_class_key addr_list_lock_key; + struct lock_class_key *qdisc_tx_busylock; + struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; @@ -2200,6 +2197,20 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } +#define netdev_lockdep_set_classes(dev) \ +{ \ + static struct lock_class_key qdisc_tx_busylock_key; \ + static struct lock_class_key qdisc_running_key; \ + static struct lock_class_key qdisc_xmit_lock_key; \ + unsigned int i; \ + \ + (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ + (dev)->qdisc_running_key = &qdisc_running_key; \ + for (i = 0; i < (dev)->num_tx_queues; i++) \ + lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ + &qdisc_xmit_lock_key); \ +} + u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, -- cgit v1.2.3 From d26c0cc53950464a24adfa76867f1d71f0cbbea6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Apr 2020 23:30:47 +0200 Subject: bpf: Avoid gcc-10 stringop-overflow warning in struct bpf_prog gcc-10 warns about accesses to zero-length arrays: kernel/bpf/core.c: In function 'bpf_patch_insn_single': cc1: warning: writing 8 bytes into a region of size 0 [-Wstringop-overflow=] In file included from kernel/bpf/core.c:21: include/linux/filter.h:550:20: note: at offset 0 to object 'insnsi' with size 0 declared here 550 | struct bpf_insn insnsi[0]; | ^~~~~~ In this case, we really want to have two flexible-array members, but that is not possible. Removing the union to make insnsi a flexible-array member while leaving insns as a zero-length array fixes the warning, as nothing writes to the other one in that way. This trick only works on linux-3.18 or higher, as older versions had additional members in the union. Fixes: 60a3b2253c41 ("net: bpf: make eBPF interpreter images read-only") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200430213101.135134-6-arnd@arndb.de --- include/linux/filter.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index af37318bb1c5..73d06a39e2d6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -545,10 +545,8 @@ struct bpf_prog { unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); /* Instructions for interpreter */ - union { - struct sock_filter insns[0]; - struct bpf_insn insnsi[0]; - }; + struct sock_filter insns[0]; + struct bpf_insn insnsi[]; }; struct sk_filter { -- cgit v1.2.3 From 57a29df341466b5cca43ba3d2d7064426727d7c3 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sat, 25 Apr 2020 02:49:14 +0800 Subject: iopoll: Introduce read_poll_timeout_atomic macro Like read_poll_timeout, an atomic variant for multiple parameter read function can be useful. Will be used by a later patch. Signed-off-by: Kai-Heng Feng Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200424184918.30360-1-kai.heng.feng@canonical.com --- include/linux/iopoll.h | 62 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index cb20c733b15a..bc89ac625f26 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -57,6 +57,48 @@ (cond) ? 0 : -ETIMEDOUT; \ }) +/** + * read_poll_timeout_atomic - Periodically poll an address until a condition is + * met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @delay_us: Time to udelay between reads in us (0 tight-loops). Should + * be less than ~10us since udelay is used (see + * Documentation/timers/timers-howto.rst). + * @timeout_us: Timeout in us, 0 means never timeout + * @delay_before_read: if it is true, delay @delay_us before read. + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \ + delay_before_read, args...) \ +({ \ + u64 __timeout_us = (timeout_us); \ + unsigned long __delay_us = (delay_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ + if (delay_before_read && __delay_us) \ + udelay(__delay_us); \ + for (;;) { \ + (val) = op(args); \ + if (cond) \ + break; \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = op(args); \ + break; \ + } \ + if (__delay_us) \ + udelay(__delay_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + /** * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs * @op: accessor function (takes @addr as its only argument) @@ -96,25 +138,7 @@ * macros defined below rather than this macro directly. */ #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ -({ \ - u64 __timeout_us = (timeout_us); \ - unsigned long __delay_us = (delay_us); \ - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ - for (;;) { \ - (val) = op(addr); \ - if (cond) \ - break; \ - if (__timeout_us && \ - ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = op(addr); \ - break; \ - } \ - if (__delay_us) \ - udelay(__delay_us); \ - } \ - (cond) ? 0 : -ETIMEDOUT; \ -}) - + read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr) #define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \ readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us) -- cgit v1.2.3 From 6349084746ff4f5f7ebc748e4b2a890f8c57b129 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 6 May 2020 16:53:13 +0200 Subject: net: phy: add concept of shared storage for PHYs There are packages which contain multiple PHY devices, eg. a quad PHY transceiver. Provide functions to allocate and free shared storage. Usually, a quad PHY contains global registers, which don't belong to any PHY. Provide convenience functions to access these registers. Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index e2bfb9240587..1d36ac608159 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -227,6 +228,28 @@ struct mdio_bus_stats { struct u64_stats_sync syncp; }; +/* Represents a shared structure between different phydev's in the same + * package, for example a quad PHY. See phy_package_join() and + * phy_package_leave(). + */ +struct phy_package_shared { + int addr; + refcount_t refcnt; + unsigned long flags; + size_t priv_size; + + /* private data pointer */ + /* note that this pointer is shared between different phydevs and + * the user has to take care of appropriate locking. It is allocated + * and freed automatically by phy_package_join() and + * phy_package_leave(). + */ + void *priv; +}; + +/* used as bit number in atomic bitops */ +#define PHY_SHARED_F_INIT_DONE 0 + /* * The Bus class for PHYs. Devices which provide access to * PHYs should register using this structure @@ -278,6 +301,12 @@ struct mii_bus { int reset_delay_us; /* RESET GPIO descriptor pointer */ struct gpio_desc *reset_gpiod; + + /* protect access to the shared element */ + struct mutex shared_lock; + + /* shared state across different PHYs */ + struct phy_package_shared *shared[PHY_MAX_ADDR]; }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) @@ -478,6 +507,10 @@ struct phy_device { /* For use by PHYs to maintain extra state */ void *priv; + /* shared data pointer */ + /* For use by PHYs inside the same package that need a shared state. */ + struct phy_package_shared *shared; + /* Interrupt and Polling infrastructure */ struct delayed_work state_queue; @@ -1354,6 +1387,10 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); +int phy_package_join(struct phy_device *phydev, int addr, size_t priv_size); +void phy_package_leave(struct phy_device *phydev); +int devm_phy_package_join(struct device *dev, struct phy_device *phydev, + int addr, size_t priv_size); #if IS_ENABLED(CONFIG_PHYLIB) int __init mdio_bus_init(void); @@ -1406,6 +1443,58 @@ static inline int phy_ethtool_get_stats(struct phy_device *phydev, return 0; } +static inline int phy_package_read(struct phy_device *phydev, u32 regnum) +{ + struct phy_package_shared *shared = phydev->shared; + + if (!shared) + return -EIO; + + return mdiobus_read(phydev->mdio.bus, shared->addr, regnum); +} + +static inline int __phy_package_read(struct phy_device *phydev, u32 regnum) +{ + struct phy_package_shared *shared = phydev->shared; + + if (!shared) + return -EIO; + + return __mdiobus_read(phydev->mdio.bus, shared->addr, regnum); +} + +static inline int phy_package_write(struct phy_device *phydev, + u32 regnum, u16 val) +{ + struct phy_package_shared *shared = phydev->shared; + + if (!shared) + return -EIO; + + return mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); +} + +static inline int __phy_package_write(struct phy_device *phydev, + u32 regnum, u16 val) +{ + struct phy_package_shared *shared = phydev->shared; + + if (!shared) + return -EIO; + + return __mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val); +} + +static inline bool phy_package_init_once(struct phy_device *phydev) +{ + struct phy_package_shared *shared = phydev->shared; + + if (!shared) + return false; + + return !test_and_set_bit(PHY_SHARED_F_INIT_DONE, &shared->flags); +} + extern struct bus_type mdio_bus_type; struct mdio_board_info { -- cgit v1.2.3 From c6af13d334759c33c14b6fad4c676c6d1dbf9564 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 1 May 2020 23:27:21 +0200 Subject: timer: add fsleep for flexible sleeping Sleeping for a certain amount of time requires use of different functions, depending on the time period. Documentation/timers/timers-howto.rst explains when to use which function, and also checkpatch checks for some potentially problematic cases. So let's create a helper that automatically chooses the appropriate sleep function -> fsleep(), for flexible sleeping If the delay is a constant, then the compiler should be able to ensure that the new helper doesn't create overhead. If the delay is not constant, then the new helper can save some code. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/delay.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 8e6828094c1e..5e016a4029d9 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -65,4 +65,15 @@ static inline void ssleep(unsigned int seconds) msleep(seconds * 1000); } +/* see Documentation/timers/timers-howto.rst for the thresholds */ +static inline void fsleep(unsigned long usecs) +{ + if (usecs <= 10) + udelay(usecs); + else if (usecs <= 20000) + usleep_range(usecs, 2 * usecs); + else + msleep(DIV_ROUND_UP(usecs, 1000)); +} + #endif /* defined(_LINUX_DELAY_H) */ -- cgit v1.2.3 From bdbdac7649fac05f88c9f7ab18121a17fb591687 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 5 May 2020 08:35:05 +0200 Subject: ethtool: provide UAPI for PHY master/slave configuration. This UAPI is needed for BroadR-Reach 100BASE-T1 devices. Due to lack of auto-negotiation support, we needed to be able to configure the MASTER-SLAVE role of the port manually or from an application in user space. The same UAPI can be used for 1000BASE-T or MultiGBASE-T devices to force MASTER or SLAVE role. See IEEE 802.3-2018: 22.2.4.3.7 MASTER-SLAVE control register (Register 9) 22.2.4.3.8 MASTER-SLAVE status register (Register 10) 40.5.2 MASTER-SLAVE configuration resolution 45.2.1.185.1 MASTER-SLAVE config value (1.2100.14) 45.2.7.10 MultiGBASE-T AN control 1 register (Register 7.32) The MASTER-SLAVE role affects the clock configuration: ------------------------------------------------------------------------------- When the PHY is configured as MASTER, the PMA Transmit function shall source TX_TCLK from a local clock source. When configured as SLAVE, the PMA Transmit function shall source TX_TCLK from the clock recovered from data stream provided by MASTER. iMX6Q KSZ9031 XXX ------\ /-----------\ /------------\ | | | | | MAC |<----RGMII----->| PHY Slave |<------>| PHY Master | |<--- 125 MHz ---+-<------/ | | \ | ------/ \-----------/ \------------/ ^ \-TX_TCLK ------------------------------------------------------------------------------- Since some clock or link related issues are only reproducible in a specific MASTER-SLAVE-role, MAC and PHY configuration, it is beneficial to provide generic (not 100BASE-T1 specific) interface to the user space for configuration flexibility and trouble shooting. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 1d36ac608159..a2b91b5f9d0a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -477,6 +477,9 @@ struct phy_device { int duplex; int pause; int asym_pause; + u8 master_slave_get; + u8 master_slave_set; + u8 master_slave_state; /* Union of PHY and Attached devices' supported link modes */ /* See ethtool.h for more info */ -- cgit v1.2.3 From 307f660d056b5eb8f5bb2328fac3915ab75b5007 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 May 2020 09:32:18 -0700 Subject: netpoll: remove dev argument from netpoll_send_skb_on_dev() netpoll_send_skb_on_dev() can get the device pointer directly from np->dev Rename it to __netpoll_send_skb() Following patch will move netpoll_send_skb() out-of-line. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netpoll.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 676f1ff161a9..00e0bae3d402 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -63,13 +63,12 @@ int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); -void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, - struct net_device *dev); +void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { unsigned long flags; local_irq_save(flags); - netpoll_send_skb_on_dev(np, skb, np->dev); + __netpoll_send_skb(np, skb); local_irq_restore(flags); } -- cgit v1.2.3 From fb1eee476b0d3be3e58dac1a3a96f726c6278bed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 May 2020 09:32:19 -0700 Subject: netpoll: move netpoll_send_skb() out of line There is no need to inline this helper, as we intend to add more code in this function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netpoll.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 00e0bae3d402..e466ddffef61 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -63,14 +63,7 @@ int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); -void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); -static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) -{ - unsigned long flags; - local_irq_save(flags); - __netpoll_send_skb(np, skb); - local_irq_restore(flags); -} +void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); #ifdef CONFIG_NETPOLL static inline void *netpoll_poll_lock(struct napi_struct *napi) -- cgit v1.2.3 From 1ddabdfaf70c202b88925edd74c66f4707dbd92e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 May 2020 09:32:20 -0700 Subject: netpoll: netpoll_send_skb() returns transmit status Some callers want to know if the packet has been sent or dropped, to inform upper stacks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netpoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e466ddffef61..f47af135bd56 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -63,7 +63,7 @@ int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); -void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); +netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); #ifdef CONFIG_NETPOLL static inline void *netpoll_poll_lock(struct napi_struct *napi) -- cgit v1.2.3 From f78ed2204db9fc35b545d693865bddbe0149aa1f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 May 2020 09:32:21 -0700 Subject: netpoll: accept NULL np argument in netpoll_send_skb() netpoll_send_skb() callers seem to leak skb if the np pointer is NULL. While this should not happen, we can make the code more robust. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_team.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index ec7e4bd07f82..537dc2b8c879 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -102,10 +102,7 @@ static inline bool team_port_dev_txable(const struct net_device *port_dev) static inline void team_netpoll_send_skb(struct team_port *port, struct sk_buff *skb) { - struct netpoll *np = port->np; - - if (np) - netpoll_send_skb(np, skb); + netpoll_send_skb(port->np, skb); } #else static inline void team_netpoll_send_skb(struct team_port *port, -- cgit v1.2.3 From ae24345da54e452880808b011fa2d8a0bbd191ba Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:58:59 -0700 Subject: bpf: Implement an interface to register bpf_iter targets The target can call bpf_iter_reg_target() to register itself. The needed information: target: target name seq_ops: the seq_file operations for the target init_seq_private target callback to initialize seq_priv during file open fini_seq_private target callback to clean up seq_priv during file release seq_priv_size: the private_data size needed by the seq_file operations The target name represents a target which provides a seq_ops for iterating objects. The target can provide two callback functions, init_seq_private and fini_seq_private, called during file open/release time. For example, /proc/net/{tcp6, ipv6_route, netlink, ...}, net name space needs to be setup properly during file open and released properly during file release. Function bpf_iter_unreg_target() is also implemented to unregister a particular target. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175859.2474669-1-yhs@fb.com --- include/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1262ec460ab3..40c78b86fe38 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -31,6 +31,7 @@ struct seq_file; struct btf; struct btf_type; struct exception_table_entry; +struct seq_operations; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1126,6 +1127,20 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); +typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); +typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); + +struct bpf_iter_reg { + const char *target; + const struct seq_operations *seq_ops; + bpf_iter_init_seq_priv_t init_seq_private; + bpf_iter_fini_seq_priv_t fini_seq_private; + u32 seq_priv_size; +}; + +int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); +void bpf_iter_unreg_target(const char *target); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, -- cgit v1.2.3 From 15d83c4d7cef5c067a8b075ce59e97df4f60706e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:00 -0700 Subject: bpf: Allow loading of a bpf_iter program A bpf_iter program is a tracing program with attach type BPF_TRACE_ITER. The load attribute attach_btf_id is used by the verifier against a particular kernel function, which represents a target, e.g., __bpf_iter__bpf_map for target bpf_map which is implemented later. The program return value must be 0 or 1 for now. 0 : successful, except potential seq_file buffer overflow which is handled by seq_file reader. 1 : request to restart the same object In the future, other return values may be used for filtering or teminating the iterator. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175900.2474947-1-yhs@fb.com --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 40c78b86fe38..f28bdd714754 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1127,6 +1127,8 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); +#define BPF_ITER_FUNC_PREFIX "__bpf_iter__" + typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); @@ -1140,6 +1142,7 @@ struct bpf_iter_reg { int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const char *target); +bool bpf_iter_prog_supported(struct bpf_prog *prog); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); -- cgit v1.2.3 From de4e05cac46d206f9090051ef09930514bff73e4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:01 -0700 Subject: bpf: Support bpf tracing/iter programs for BPF_LINK_CREATE Given a bpf program, the step to create an anonymous bpf iterator is: - create a bpf_iter_link, which combines bpf program and the target. In the future, there could be more information recorded in the link. A link_fd will be returned to the user space. - create an anonymous bpf iterator with the given link_fd. The bpf_iter_link can be pinned to bpffs mount file system to create a file based bpf iterator as well. The benefit to use of bpf_iter_link: - using bpf link simplifies design and implementation as bpf link is used for other tracing bpf programs. - for file based bpf iterator, bpf_iter_link provides a standard way to replace underlying bpf programs. - for both anonymous and free based iterators, bpf link query capability can be leveraged. The patch added support of tracing/iter programs for BPF_LINK_CREATE. A new link type BPF_LINK_TYPE_ITER is added to facilitate link querying. Currently, only prog_id is needed, so there is no additional in-kernel show_fdinfo() and fill_link_info() hook is needed for BPF_LINK_TYPE_ITER link. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175901.2475084-1-yhs@fb.com --- include/linux/bpf.h | 1 + include/linux/bpf_types.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f28bdd714754..e93d2d33c82c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1143,6 +1143,7 @@ struct bpf_iter_reg { int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const char *target); bool bpf_iter_prog_supported(struct bpf_prog *prog); +int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 8345cdf553b8..29d22752fc87 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -124,3 +124,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) #ifdef CONFIG_CGROUP_BPF BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) #endif +BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) -- cgit v1.2.3 From ac51d99bf81caac8d8881fe52098948110d0de68 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:05 -0700 Subject: bpf: Create anonymous bpf iterator A new bpf command BPF_ITER_CREATE is added. The anonymous bpf iterator is seq_file based. The seq_file private data are referenced by targets. The bpf_iter infrastructure allocated additional space at seq_file->private before the space used by targets to store some meta data, e.g., prog: prog to run session_id: an unique id for each opened seq_file seq_num: how many times bpf programs are queried in this session done_stop: an internal state to decide whether bpf program should be called in seq_ops->stop() or not The seq_num will start from 0 for valid objects. The bpf program may see the same seq_num more than once if - seq_file buffer overflow happens and the same object is retried by bpf_seq_read(), or - the bpf program explicitly requests a retry of the same object Since module is not supported for bpf_iter, all target registeration happens at __init time, so there is no need to change bpf_iter_unreg_target() as it is used mostly in error path of the init function at which time no bpf iterators have been created yet. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175905.2475770-1-yhs@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e93d2d33c82c..80b1b9d8a638 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1144,6 +1144,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const char *target); bool bpf_iter_prog_supported(struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int bpf_iter_new_fd(struct bpf_link *link); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); -- cgit v1.2.3 From 367ec3e4834cbd611401c2c40a23c22c825474f1 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:06 -0700 Subject: bpf: Create file bpf iterator To produce a file bpf iterator, the fd must be corresponding to a link_fd assocciated with a trace/iter program. When the pinned file is opened, a seq_file will be generated. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175906.2475893-1-yhs@fb.com --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80b1b9d8a638..b06653ab3476 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1022,6 +1022,7 @@ static inline void bpf_enable_instrumentation(void) extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; +extern const struct file_operations bpf_iter_fops; #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ @@ -1145,6 +1146,7 @@ void bpf_iter_unreg_target(const char *target); bool bpf_iter_prog_supported(struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); +bool bpf_link_is_iter(struct bpf_link *link); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); -- cgit v1.2.3 From e5158d987b72c3f318b4b52a01ac6f3997bd0c00 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:07 -0700 Subject: bpf: Implement common macros/helpers for target iterators Macro DEFINE_BPF_ITER_FUNC is implemented so target can define an init function to capture the BTF type which represents the target. The bpf_iter_meta is a structure holding meta data, common to all targets in the bpf program. Additional marker functions are called before or after bpf_seq_read() show()/next()/stop() callback functions to help calculate precise seq_num and whether call bpf_prog inside stop(). Two functions, bpf_iter_get_info() and bpf_iter_run_prog(), are implemented so target can get needed information from bpf_iter infrastructure and can run the program. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175907.2475956-1-yhs@fb.com --- include/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b06653ab3476..ffe0b9b669bf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1129,6 +1129,9 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); #define BPF_ITER_FUNC_PREFIX "__bpf_iter__" +#define DEFINE_BPF_ITER_FUNC(target, args...) \ + extern int __bpf_iter__ ## target(args); \ + int __init __bpf_iter__ ## target(args) { return 0; } typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); @@ -1141,12 +1144,20 @@ struct bpf_iter_reg { u32 seq_priv_size; }; +struct bpf_iter_meta { + __bpf_md_ptr(struct seq_file *, seq); + u64 session_id; + u64 seq_num; +}; + int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const char *target); bool bpf_iter_prog_supported(struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); bool bpf_link_is_iter(struct bpf_link *link); +struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop); +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); -- cgit v1.2.3 From 6086d29def80edd78f9832ea6eafa74e3818f6a7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:09 -0700 Subject: bpf: Add bpf_map iterator Implement seq_file operations to traverse all bpf_maps. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175909.2476096-1-yhs@fb.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ffe0b9b669bf..363ab0751967 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1082,6 +1082,7 @@ int generic_map_update_batch(struct bpf_map *map, int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); +struct bpf_map *bpf_map_get_curr_or_next(u32 *id); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From 138d0be35b141e09f6b267c6ae4094318d4e4491 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:10 -0700 Subject: net: bpf: Add netlink and ipv6_route bpf_iter targets This patch added netlink and ipv6_route targets, using the same seq_ops (except show() and minor changes for stop()) for /proc/net/{netlink,ipv6_route}. The net namespace for these targets are the current net namespace at file open stage, similar to /proc/net/{netlink,ipv6_route} reference counting the net namespace at seq_file open stage. Since module is not supported for now, ipv6_route is supported only if the IPV6 is built-in, i.e., not compiled as a module. The restriction can be lifted once module is properly supported for bpf_iter. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175910.2476329-1-yhs@fb.com --- include/linux/proc_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 45c05fd9c99d..03953c59807d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -105,6 +105,9 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo void *data); extern struct pid *tgid_pidfd_to_pid(const struct file *file); +extern int bpf_iter_init_seq_net(void *priv_data); +extern void bpf_iter_fini_seq_net(void *priv_data); + #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must -- cgit v1.2.3 From b121b341e5983bdccf7a5d6cf9236a45c965a31f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:12 -0700 Subject: bpf: Add PTR_TO_BTF_ID_OR_NULL support Add bpf_reg_type PTR_TO_BTF_ID_OR_NULL support. For tracing/iter program, the bpf program context definition, e.g., for previous bpf_map target, looks like struct bpf_iter__bpf_map { struct bpf_iter_meta *meta; struct bpf_map *map; }; The kernel guarantees that meta is not NULL, but map pointer maybe NULL. The NULL map indicates that all objects have been traversed, so bpf program can take proper action, e.g., do final aggregation and/or send final report to user space. Add btf_id_or_null_non0_off to prog->aux structure, to indicate that if the context access offset is not 0, set to PTR_TO_BTF_ID_OR_NULL instead of PTR_TO_BTF_ID. This bit is set for tracing/iter program. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175912.2476576-1-yhs@fb.com --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 363ab0751967..cf4b6e44f2bc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -320,6 +320,7 @@ enum bpf_reg_type { PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ PTR_TO_BTF_ID, /* reg points to kernel struct */ + PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -658,6 +659,7 @@ struct bpf_prog_aux { bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; + bool btf_id_or_null_non0_off; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; -- cgit v1.2.3 From e7bb7ecefa817543e11fa3c1c3e55deb90b02e6c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:59:21 -0500 Subject: IB/mlx4: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jakub Kicinski --- include/linux/mlx4/qp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 8e2828d48d7f..9db93e487496 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -362,7 +362,7 @@ struct mlx4_wqe_datagram_seg { struct mlx4_wqe_lso_seg { __be32 mss_hdr_size; - __be32 header[0]; + __be32 header[]; }; enum mlx4_wqe_bind_seg_flags2 { -- cgit v1.2.3 From a68a813836e12b15715d9101309899123c250302 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 10 May 2020 21:12:30 +0200 Subject: net: phy: Add cable test support to state machine Running a cable test is desruptive to normal operation of the PHY and can take a 5 to 10 seconds to complete. The RTNL lock cannot be held for this amount of time, and add a new state to the state machine for running a cable test. The driver is expected to implement two functions. The first is used to start a cable test. Once the test has started, it should return. The second function is called once per second, or on interrupt to check if the cable test is complete, and to allow the PHY to report the status. v2: Rename phy_cable_test_abort to phy_abort_cable_test Return different extack when already running test Use phy_init_hw() to reset the PHY Reviewed-by: Florian Fainelli Signed-off-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index a2b91b5f9d0a..632403fc34f4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -372,6 +373,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); * - irq or timer will set NOLINK if link goes down * - phy_stop moves to HALTED * + * CABLETEST: PHY is performing a cable test. Packet reception/sending + * is not expected to work, carrier will be indicated as down. PHY will be + * poll once per second, or on interrupt for it current state. + * Once complete, move to UP to restart the PHY. + * - phy_stop aborts the running test and moves to HALTED + * * HALTED: PHY is up, but no polling or interrupts are done. Or * PHY is in an error state. * - phy_start moves to UP @@ -383,6 +390,7 @@ enum phy_state { PHY_UP, PHY_RUNNING, PHY_NOLINK, + PHY_CABLETEST, }; /** @@ -689,6 +697,13 @@ struct phy_driver { int (*module_eeprom)(struct phy_device *dev, struct ethtool_eeprom *ee, u8 *data); + /* Start a cable test */ + int (*cable_test_start)(struct phy_device *dev); + /* Once per second, or on interrupt, request the status of the + * test. + */ + int (*cable_test_get_status)(struct phy_device *dev, bool *finished); + /* Get statistics from the phy using ethtool */ int (*get_sset_count)(struct phy_device *dev); void (*get_strings)(struct phy_device *dev, u8 *data); @@ -1227,6 +1242,19 @@ int phy_speed_up(struct phy_device *phydev); int phy_restart_aneg(struct phy_device *phydev); int phy_reset_after_clk_enable(struct phy_device *phydev); +#if IS_ENABLED(CONFIG_PHYLIB) +int phy_start_cable_test(struct phy_device *phydev, + struct netlink_ext_ack *extack); +#else +static inline +int phy_start_cable_test(struct phy_device *phydev, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support"); + return -EOPNOTSUPP; +} +#endif + static inline void phy_device_reset(struct phy_device *phydev, int value) { mdio_device_reset(&phydev->mdio, value); -- cgit v1.2.3 From 97c22438963a7484c05c59ab6654e30f0a3e9288 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 10 May 2020 21:12:32 +0200 Subject: net: phy: Add support for polling cable test Some PHYs are not capable of generating interrupts when a cable test finished. They do however support interrupts for normal operations, like link up/down. As such, the PHY state machine would normally not poll the PHY. Add support for indicating the PHY state machine must poll the PHY when performing a cable test. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 632403fc34f4..f58eee735a45 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -79,6 +79,7 @@ extern const int phy_10gbit_features_array[1]; #define PHY_IS_INTERNAL 0x00000001 #define PHY_RST_AFTER_CLK_EN 0x00000002 +#define PHY_POLL_CABLE_TEST 0x00000004 #define MDIO_DEVICE_IS_PHY 0x80000000 /* Interface Mode definitions */ @@ -1061,6 +1062,10 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev) */ static inline bool phy_polling_mode(struct phy_device *phydev) { + if (phydev->state == PHY_CABLETEST) + if (phydev->drv->flags & PHY_POLL_CABLE_TEST) + return true; + return phydev->irq == PHY_POLL; } -- cgit v1.2.3 From 1dd3f212af30b42c90ba252c165f2f6d2ddf5230 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 10 May 2020 21:12:36 +0200 Subject: net: ethtool: Add infrastructure for reporting cable test results Provide infrastructure for PHY drivers to report the cable test results. A netlink skb is associated to the phydev. Helpers will be added which can add results to this skb. Once the test has finished the results are sent to user space. When netlink ethtool is not part of the kernel configuration stubs are provided. It is also impossible to trigger a cable test, so the error code returned by the alloc function is of no consequence. v2: Include the status complete in the netlink notification message v4: Replace -EINVAL with -EMSGSIZE Signed-off-by: Andrew Lunn Reviewed-by: Michal Kubecek Signed-off-by: Jakub Kicinski --- include/linux/ethtool_netlink.h | 20 ++++++++++++++++++++ include/linux/phy.h | 5 +++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index d01b77887f82..7d763ba22f6f 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -14,4 +14,24 @@ enum ethtool_multicast_groups { ETHNL_MCGRP_MONITOR, }; +struct phy_device; + +#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK) +int ethnl_cable_test_alloc(struct phy_device *phydev); +void ethnl_cable_test_free(struct phy_device *phydev); +void ethnl_cable_test_finished(struct phy_device *phydev); +#else +static inline int ethnl_cable_test_alloc(struct phy_device *phydev) +{ + return -ENOTSUPP; +} + +static inline void ethnl_cable_test_free(struct phy_device *phydev) +{ +} + +static inline void ethnl_cable_test_finished(struct phy_device *phydev) +{ +} +#endif /* IS_ENABLED(ETHTOOL_NETLINK) */ #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index f58eee735a45..169fae4249a9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -523,6 +523,11 @@ struct phy_device { /* For use by PHYs inside the same package that need a shared state. */ struct phy_package_shared *shared; + /* Reporting cable test results */ + struct sk_buff *skb; + void *ehdr; + struct nlattr *nest; + /* Interrupt and Polling infrastructure */ struct delayed_work state_queue; -- cgit v1.2.3 From 1e2dc14509fd072739e4bab98ac42317267dbad6 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 10 May 2020 21:12:37 +0200 Subject: net: ethtool: Add helpers for reporting test results The PHY drivers can use these helpers for reporting the results. The results get translated into netlink attributes which are added to the pre-allocated skbuf. v3: Poison phydev->skb Return -EMSGSIZE when ethnl_bcastmsg_put() fails Return valid error code when nla_nest_start() fails Use u8 for results Actually put u32 length into message v4: s/ENOTSUPP/EOPNOTSUPP/g Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Michal Kubecek Signed-off-by: Jakub Kicinski --- include/linux/ethtool_netlink.h | 15 ++++++++++++++- include/linux/phy.h | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 7d763ba22f6f..e317fc99565e 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -20,10 +20,12 @@ struct phy_device; int ethnl_cable_test_alloc(struct phy_device *phydev); void ethnl_cable_test_free(struct phy_device *phydev); void ethnl_cable_test_finished(struct phy_device *phydev); +int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result); +int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm); #else static inline int ethnl_cable_test_alloc(struct phy_device *phydev) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline void ethnl_cable_test_free(struct phy_device *phydev) @@ -33,5 +35,16 @@ static inline void ethnl_cable_test_free(struct phy_device *phydev) static inline void ethnl_cable_test_finished(struct phy_device *phydev) { } +static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, + u8 result) +{ + return -EOPNOTSUPP; +} + +static inline int ethnl_cable_test_fault_length(struct phy_device *phydev, + u8 pair, u32 cm) +{ + return -EOPNOTSUPP; +} #endif /* IS_ENABLED(ETHTOOL_NETLINK) */ #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 169fae4249a9..5d8ff5428010 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1265,6 +1265,10 @@ int phy_start_cable_test(struct phy_device *phydev, } #endif +int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result); +int phy_cable_test_fault_length(struct phy_device *phydev, u8 pair, + u16 cm); + static inline void phy_device_reset(struct phy_device *phydev, int value) { mdio_device_reset(&phydev->mdio, value); -- cgit v1.2.3 From b6ca09cb156d349e6fdde8a8466ec15b902d1419 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:59:35 -0500 Subject: net/mlx5: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 66 +++++++++++++++++++++---------------------- include/linux/mlx5/qp.h | 2 +- 3 files changed, 35 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 267dfcc5493e..24e04901f92e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -201,7 +201,7 @@ struct mlx5_rsc_debug { void *object; enum dbg_rsc_type type; struct dentry *root; - struct mlx5_field_desc fields[0]; + struct mlx5_field_desc fields[]; }; enum mlx5_dev_event { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fb243848132d..c9dd6e99ad56 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1703,7 +1703,7 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_140[0x4c0]; - struct mlx5_ifc_cmd_pas_bits pas[0]; + struct mlx5_ifc_cmd_pas_bits pas[]; }; struct mlx5_ifc_rq_num_bits { @@ -1921,7 +1921,7 @@ struct mlx5_ifc_resource_dump_menu_segment_bits { u8 reserved_at_20[0x10]; u8 num_of_records[0x10]; - struct mlx5_ifc_resource_dump_menu_record_bits record[0]; + struct mlx5_ifc_resource_dump_menu_record_bits record[]; }; struct mlx5_ifc_resource_dump_resource_segment_bits { @@ -1933,7 +1933,7 @@ struct mlx5_ifc_resource_dump_resource_segment_bits { u8 index2[0x20]; - u8 payload[0][0x20]; + u8 payload[][0x20]; }; struct mlx5_ifc_resource_dump_terminate_segment_bits { @@ -3010,7 +3010,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_1200[0x600]; - union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[0]; + union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[]; }; enum { @@ -3303,7 +3303,7 @@ struct mlx5_ifc_rqtc_bits { u8 reserved_at_e0[0x6a0]; - struct mlx5_ifc_rq_num_bits rq_num[0]; + struct mlx5_ifc_rq_num_bits rq_num[]; }; enum { @@ -3415,7 +3415,7 @@ struct mlx5_ifc_nic_vport_context_bits { u8 reserved_at_7e0[0x20]; - u8 current_uc_mac_address[0][0x40]; + u8 current_uc_mac_address[][0x40]; }; enum { @@ -4338,7 +4338,7 @@ struct mlx5_ifc_query_xrc_srq_out_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_xrc_srq_in_bits { @@ -4616,7 +4616,7 @@ struct mlx5_ifc_query_srq_out_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_srq_in_bits { @@ -4827,7 +4827,7 @@ struct mlx5_ifc_query_qp_out_bits { u8 reserved_at_800[0x80]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_qp_in_bits { @@ -5160,7 +5160,7 @@ struct mlx5_ifc_query_hca_vport_pkey_out_bits { u8 reserved_at_40[0x40]; - struct mlx5_ifc_pkey_bits pkey[0]; + struct mlx5_ifc_pkey_bits pkey[]; }; struct mlx5_ifc_query_hca_vport_pkey_in_bits { @@ -5196,7 +5196,7 @@ struct mlx5_ifc_query_hca_vport_gid_out_bits { u8 gids_num[0x10]; u8 reserved_at_70[0x10]; - struct mlx5_ifc_array128_auto_bits gid[0]; + struct mlx5_ifc_array128_auto_bits gid[]; }; struct mlx5_ifc_query_hca_vport_gid_in_bits { @@ -5464,7 +5464,7 @@ struct mlx5_ifc_query_flow_counter_out_bits { u8 reserved_at_40[0x40]; - struct mlx5_ifc_traffic_counter_bits flow_statistics[0]; + struct mlx5_ifc_traffic_counter_bits flow_statistics[]; }; struct mlx5_ifc_query_flow_counter_in_bits { @@ -5558,7 +5558,7 @@ struct mlx5_ifc_query_eq_out_bits { u8 reserved_at_300[0x580]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_eq_in_bits { @@ -5583,7 +5583,7 @@ struct mlx5_ifc_packet_reformat_context_in_bits { u8 reserved_at_20[0x10]; u8 reformat_data[2][0x8]; - u8 more_reformat_data[0][0x8]; + u8 more_reformat_data[][0x8]; }; struct mlx5_ifc_query_packet_reformat_context_out_bits { @@ -5594,7 +5594,7 @@ struct mlx5_ifc_query_packet_reformat_context_out_bits { u8 reserved_at_40[0xa0]; - struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0]; + struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[]; }; struct mlx5_ifc_query_packet_reformat_context_in_bits { @@ -5833,7 +5833,7 @@ struct mlx5_ifc_query_cq_out_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_query_cq_in_bits { @@ -6440,7 +6440,7 @@ struct mlx5_ifc_modify_cq_in_bits { u8 reserved_at_300[0x580]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_modify_cong_status_out_bits { @@ -6504,7 +6504,7 @@ struct mlx5_ifc_manage_pages_out_bits { u8 reserved_at_60[0x20]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; enum { @@ -6526,7 +6526,7 @@ struct mlx5_ifc_manage_pages_in_bits { u8 input_num_entries[0x20]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_mad_ifc_out_bits { @@ -7481,7 +7481,7 @@ struct mlx5_ifc_create_xrc_srq_in_bits { u8 reserved_at_300[0x580]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_tis_out_bits { @@ -7557,7 +7557,7 @@ struct mlx5_ifc_create_srq_in_bits { u8 reserved_at_280[0x600]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_sq_out_bits { @@ -7718,7 +7718,7 @@ struct mlx5_ifc_create_qp_in_bits { u8 wq_umem_valid[0x1]; u8 reserved_at_861[0x1f]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_psv_out_bits { @@ -7789,7 +7789,7 @@ struct mlx5_ifc_create_mkey_in_bits { u8 reserved_at_320[0x560]; - u8 klm_pas_mtt[0][0x20]; + u8 klm_pas_mtt[][0x20]; }; enum { @@ -7922,7 +7922,7 @@ struct mlx5_ifc_create_eq_in_bits { u8 reserved_at_3c0[0x4c0]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_create_dct_out_bits { @@ -7979,7 +7979,7 @@ struct mlx5_ifc_create_cq_in_bits { u8 cq_umem_valid[0x1]; u8 reserved_at_2e1[0x59f]; - u8 pas[0][0x40]; + u8 pas[][0x40]; }; struct mlx5_ifc_config_int_moderation_out_bits { @@ -8335,7 +8335,7 @@ struct mlx5_ifc_access_register_out_bits { u8 reserved_at_40[0x40]; - u8 register_data[0][0x20]; + u8 register_data[][0x20]; }; enum { @@ -8355,7 +8355,7 @@ struct mlx5_ifc_access_register_in_bits { u8 argument[0x20]; - u8 register_data[0][0x20]; + u8 register_data[][0x20]; }; struct mlx5_ifc_sltp_reg_bits { @@ -9372,7 +9372,7 @@ struct mlx5_ifc_cmd_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 command[0][0x20]; + u8 command[][0x20]; }; struct mlx5_ifc_cmd_if_box_bits { @@ -9666,7 +9666,7 @@ struct mlx5_ifc_mcqi_reg_bits { u8 reserved_at_a0[0x10]; u8 data_size[0x10]; - union mlx5_ifc_mcqi_reg_data_bits data[0]; + union mlx5_ifc_mcqi_reg_data_bits data[]; }; struct mlx5_ifc_mcc_reg_bits { @@ -10252,7 +10252,7 @@ struct mlx5_ifc_umem_bits { u8 num_of_mtt[0x40]; - struct mlx5_ifc_mtt_bits mtt[0]; + struct mlx5_ifc_mtt_bits mtt[]; }; struct mlx5_ifc_uctx_bits { @@ -10377,7 +10377,7 @@ struct mlx5_ifc_mtrc_stdb_bits { u8 reserved_at_4[0x4]; u8 read_size[0x18]; u8 start_offset[0x20]; - u8 string_db_data[0]; + u8 string_db_data[]; }; struct mlx5_ifc_mtrc_ctrl_bits { @@ -10431,7 +10431,7 @@ struct mlx5_ifc_query_esw_functions_out_bits { struct mlx5_ifc_host_params_context_bits host_params_context; u8 reserved_at_280[0x180]; - u8 host_sf_enable[0][0x40]; + u8 host_sf_enable[][0x40]; }; struct mlx5_ifc_sf_partition_bits { @@ -10451,7 +10451,7 @@ struct mlx5_ifc_query_sf_partitions_out_bits { u8 reserved_at_60[0x20]; - struct mlx5_ifc_sf_partition_bits sf_partition[0]; + struct mlx5_ifc_sf_partition_bits sf_partition[]; }; struct mlx5_ifc_query_sf_partitions_in_bits { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f23eb18526fe..1af5e460b5f6 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -408,7 +408,7 @@ struct mlx5_wqe_signature_seg { struct mlx5_wqe_inline_seg { __be32 byte_count; - __be32 data[0]; + __be32 data[]; }; enum mlx5_sig_type { -- cgit v1.2.3 From ac02a451a6148bb9c395b39783ce7299eddf4f31 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 10 May 2020 19:37:43 +0300 Subject: net: dsa: sja1105: implement cross-chip bridging operations sja1105 uses dsa_8021q for DSA tagging, a format which is VLAN at heart and which is compatible with cascading. A complete description of this tagging format is in net/dsa/tag_8021q.c, but a quick summary is that each external-facing port tags incoming frames with a unique pvid, and this special VLAN is transmitted as tagged towards the inside of the system, and as untagged towards the exterior. The tag encodes the switch id and the source port index. This means that cross-chip bridging for dsa_8021q only entails adding the dsa_8021q pvids of one switch to the RX filter of the other switches. Everything else falls naturally into place, as long as the bottom-end of ports (the leaves in the tree) is comprised exclusively of dsa_8021q-compatible (i.e. sja1105 switches). Otherwise, there would be a chance that a front-panel switch transmits a packet tagged with a dsa_8021q header, header which it wouldn't be able to remove, and which would hence "leak" out. The only use case I tested (due to lack of board availability) was when the sja1105 switches are part of disjoint trees (however, this doesn't change the fact that multiple sja1105 switches still need unique switch identifiers in such a system). But in principle, even "true" single-tree setups (with DSA links) should work just as fine, except for a small change which I can't test: dsa_towards_port should be used instead of dsa_upstream_port (I made the assumption that the routing port that any sja1105 should use towards its neighbours is the CPU port. That might not hold true in other setups). Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index c620d9139c28..b8daaec0896e 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -12,11 +12,33 @@ struct sk_buff; struct net_device; struct packet_type; +struct dsa_8021q_crosschip_link { + struct list_head list; + int port; + struct dsa_switch *other_ds; + int other_port; + refcount_t refcount; +}; + #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, bool enabled); +int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, bool enabled); + +int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, struct net_device *br, + struct list_head *crosschip_links); + +int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, struct net_device *br, + struct list_head *crosschip_links); + struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci); @@ -36,6 +58,29 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, return 0; } +int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, bool enabled) +{ + return 0; +} + +int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, struct net_device *br, + struct list_head *crosschip_links) +{ + return 0; +} + +int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, + struct dsa_switch *other_ds, + int other_port, struct net_device *br, + struct list_head *crosschip_links) +{ + return 0; +} + struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, u16 tpid, u16 tci) { -- cgit v1.2.3 From 9c8255c888bac9221739c822132b405d4196bdd8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:25:07 -0500 Subject: team: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/linux/if_team.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 537dc2b8c879..add607943c95 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -67,7 +67,7 @@ struct team_port { u16 queue_id; struct list_head qom_list; /* node in queue override mapping list */ struct rcu_head rcu; - long mode_priv[0]; + long mode_priv[]; }; static inline struct team_port *team_port_get_rcu(const struct net_device *dev) -- cgit v1.2.3 From 0462b6bdb6445b887b8896f28be92e0d94c92e7b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 May 2020 13:59:11 +0200 Subject: net: add a CMSG_USER_DATA macro Add a variant of CMSG_DATA that operates on user pointer to avoid sparse warnings about casting to/from user pointers. Also fix up CMSG_DATA to rely on the gcc extension that allows void pointer arithmetics to cut down on the amount of casts. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/socket.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 54338fac45cb..4cc64d611cf4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -94,7 +94,10 @@ struct cmsghdr { #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) -#define CMSG_DATA(cmsg) ((void *)((char *)(cmsg) + sizeof(struct cmsghdr))) +#define CMSG_DATA(cmsg) \ + ((void *)(cmsg) + sizeof(struct cmsghdr)) +#define CMSG_USER_DATA(cmsg) \ + ((void __user *)(cmsg) + sizeof(struct cmsghdr)) #define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len)) #define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len)) -- cgit v1.2.3 From 1f466e1f15cf1dac7c86798d694649fc42cd868a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 May 2020 13:59:13 +0200 Subject: net: cleanly handle kernel vs user buffers for ->msg_control The msg_control field in struct msghdr can either contain a user pointer when used with the recvmsg system call, or a kernel pointer when used with sendmsg. To complicate things further kernel_recvmsg can stuff a kernel pointer in and then use set_fs to make the uaccess helpers accept it. Replace it with a union of a kernel pointer msg_control field, and a user pointer msg_control_user one, and allow kernel_recvmsg operate on a proper kernel pointer using a bitfield to override the normal choice of a user pointer for recvmsg. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/socket.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 4cc64d611cf4..04d2bc97f497 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -50,7 +50,17 @@ struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ struct iov_iter msg_iter; /* data */ - void *msg_control; /* ancillary data */ + + /* + * Ancillary data. msg_control_user is the user buffer used for the + * recv* side when msg_control_is_user is set, msg_control is the kernel + * buffer used for all other cases. + */ + union { + void *msg_control; + void __user *msg_control_user; + }; + bool msg_control_is_user : 1; __kernel_size_t msg_controllen; /* ancillary data buffer length */ unsigned int msg_flags; /* flags on received message */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ -- cgit v1.2.3 From 1f66b0f0aec671f8fbc86d75b2efdf7c7e0f7880 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 May 2020 20:20:26 +0300 Subject: net: dsa: tag_8021q: introduce a vid_is_dsa_8021q helper This function returns a boolean denoting whether the VLAN passed as argument is part of the 1024-3071 range that the dsa_8021q tagging scheme uses. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index b8daaec0896e..ebc245ff838a 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -50,6 +50,8 @@ int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); +bool vid_is_dsa_8021q(u16 vid); + #else int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, @@ -107,6 +109,11 @@ int dsa_8021q_rx_source_port(u16 vid) return 0; } +bool vid_is_dsa_8021q(u16 vid) +{ + return false; +} + #endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */ #endif /* _NET_DSA_8021Q_H */ -- cgit v1.2.3 From ec5ae61076d07be986df19773662506220757c9f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 May 2020 20:20:29 +0300 Subject: net: dsa: sja1105: save/restore VLANs using a delta commit method Managing the VLAN table that is present in hardware will become very difficult once we add a third operating state (best_effort_vlan_filtering). That is because correct cleanup (not too little, not too much) becomes virtually impossible, when VLANs can be added from the bridge layer, from dsa_8021q for basic tagging, for cross-chip bridging, as well as retagging rules for sub-VLANs and cross-chip sub-VLANs. So we need to rethink VLAN interaction with the switch in a more scalable way. In preparation for that, use the priv->expect_dsa_8021q boolean to classify any VLAN request received through .port_vlan_add or .port_vlan_del towards either one of 2 internal lists: bridge VLANs and dsa_8021q VLANs. Then, implement a central sja1105_build_vlan_table method that creates a VLAN configuration from scratch based on the 2 lists of VLANs kept by the driver, and based on the VLAN awareness state. Currently, if we are VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs. Then, implement a delta commit procedure that identifies which VLANs from this new configuration are actually different from the config previously committed to hardware. We apply the delta through the dynamic configuration interface (we don't reset the switch). The result is that the hardware should see the exact sequence of operations as before this patch. This also helps remove the "br" argument passed to dsa_8021q_crosschip_bridge_join, which it was only using to figure out whether it should commit the configuration back to us or not, based on the VLAN awareness state of the bridge. We can simplify that, by always allowing those VLANs inside of our dsa_8021q_vlans list, and committing those to hardware when necessary. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index ebc245ff838a..404bd2cce642 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -25,18 +25,14 @@ struct dsa_8021q_crosschip_link { int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, bool enabled); -int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port, - struct dsa_switch *other_ds, - int other_port, bool enabled); - int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, struct dsa_switch *other_ds, - int other_port, struct net_device *br, + int other_port, struct list_head *crosschip_links); int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, struct dsa_switch *other_ds, - int other_port, struct net_device *br, + int other_port, struct list_head *crosschip_links); struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, @@ -60,16 +56,9 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, return 0; } -int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port, - struct dsa_switch *other_ds, - int other_port, bool enabled) -{ - return 0; -} - int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, struct dsa_switch *other_ds, - int other_port, struct net_device *br, + int other_port, struct list_head *crosschip_links) { return 0; @@ -77,7 +66,7 @@ int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port, int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port, struct dsa_switch *other_ds, - int other_port, struct net_device *br, + int other_port, struct list_head *crosschip_links) { return 0; -- cgit v1.2.3 From 38b5beeae7a4cde87edabb0196fac1f55ae668ee Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 May 2020 20:20:32 +0300 Subject: net: dsa: sja1105: prepare tagger for handling DSA tags and VLAN simultaneously In VLAN-unaware mode, sja1105 uses VLAN tags with a custom TPID of 0xdadb. While in the yet-to-be introduced best_effort_vlan_filtering mode, it needs to work with normal VLAN TPID values. A complication arises when we must transmit a VLAN-tagged packet to the switch when it's in VLAN-aware mode. We need to construct a packet with 2 VLAN tags, and the switch will use the outer header for routing and pop it on egress. But sadly, here the 2 hardware generations don't behave the same: - E/T switches won't pop an ETH_P_8021AD tag on egress, it seems (packets will remain double-tagged). - P/Q/R/S switches will drop a packet with 2 ETH_P_8021Q tags (it looks like it tries to prevent VLAN hopping). But looks like the reverse is also true: - E/T switches have no problem popping the outer tag from packets with 2 ETH_P_8021Q tags. - P/Q/R/S will have no problem popping a single tag even if that is ETH_P_8021AD. So it is clear that if we want the hardware to work with dsa_8021q tagging in VLAN-aware mode, we need to send different TPIDs depending on revision. Keep that information in priv->info->qinq_tpid. The per-port tagger structure will hold an xmit_tpid value that depends not only upon the qinq_tpid, but also upon the VLAN awareness state itself (in case we must transmit using 0xdadb). Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index fa5735c353cd..f821d08b1b5f 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -59,6 +59,7 @@ struct sja1105_port { struct sja1105_tagger_data *data; struct dsa_port *dp; bool hwts_tx_en; + u16 xmit_tpid; }; #endif /* _NET_DSA_SJA1105_H */ -- cgit v1.2.3 From 3eaae1d05f2b5be1be834bfad64f8fc2ad39a56d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 May 2020 20:20:33 +0300 Subject: net: dsa: tag_8021q: support up to 8 VLANs per port using sub-VLANs For switches that support VLAN retagging, such as sja1105, we extend dsa_8021q by encoding a "sub-VLAN" into the remaining 3 free bits in the dsa_8021q tag. A sub-VLAN is nothing more than a number in the range 0-7, which serves as an index into a per-port driver lookup table. The sub-VLAN value of zero means that traffic is untagged (this is also backwards-compatible with dsa_8021q without retagging). The switch should be configured to retag VLAN-tagged traffic that gets transmitted towards the CPU port (and towards the CPU only). Example: bridge vlan add dev sw1p0 vid 100 The switch retags frames received on port 0, going to the CPU, and having VID 100, to the VID of 1104 (0x0450). In dsa_8021q language: | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | +-----------+-----+-----------------+-----------+-----------------------+ | DIR | SVL | SWITCH_ID | SUBVLAN | PORT | +-----------+-----+-----------------+-----------+-----------------------+ 0x0450 means: - DIR = 0b01: this is an RX VLAN - SUBVLAN = 0b001: this is subvlan #1 - SWITCH_ID = 0b001: this is switch 1 (see the name "sw1p0") - PORT = 0b0000: this is port 0 (see the name "sw1p0") The driver also remembers the "1 -> 100" mapping. In the hotpath, if the sub-VLAN from the tag encodes a non-untagged frame, this mapping is used to create a VLAN hwaccel tag, with the value of 100. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 404bd2cce642..311aa04e7520 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -20,6 +20,8 @@ struct dsa_8021q_crosschip_link { refcount_t refcount; }; +#define DSA_8021Q_N_SUBVLAN 8 + #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, @@ -42,10 +44,14 @@ u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port); u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port); +u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan); + int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); +u16 dsa_8021q_rx_subvlan(u16 vid); + bool vid_is_dsa_8021q(u16 vid); #else @@ -88,6 +94,11 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port) return 0; } +u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan) +{ + return 0; +} + int dsa_8021q_rx_switch_id(u16 vid) { return 0; @@ -98,6 +109,11 @@ int dsa_8021q_rx_source_port(u16 vid) return 0; } +u16 dsa_8021q_rx_subvlan(u16 vid) +{ + return 0; +} + bool vid_is_dsa_8021q(u16 vid) { return false; -- cgit v1.2.3 From 84eeb5d460e399795e9a92a0cd44999254886150 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 May 2020 20:20:34 +0300 Subject: net: dsa: tag_sja1105: implement sub-VLAN decoding Create a subvlan_map as part of each port's tagger private structure. This keeps reverse mappings of bridge-to-dsa_8021q VLAN retagging rules. Note that as of this patch, this piece of code is never engaged, due to the fact that the driver hasn't installed any retagging rule, so we'll always see packets with a subvlan code of 0 (untagged). Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/dsa/sja1105.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h index f821d08b1b5f..dd93735ae228 100644 --- a/include/linux/dsa/sja1105.h +++ b/include/linux/dsa/sja1105.h @@ -9,6 +9,7 @@ #include #include +#include #include #define ETH_P_SJA1105 ETH_P_DSA_8021Q @@ -53,6 +54,7 @@ struct sja1105_skb_cb { ((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb)) struct sja1105_port { + u16 subvlan_map[DSA_8021Q_N_SUBVLAN]; struct kthread_worker *xmit_worker; struct kthread_work xmit_work; struct sk_buff_head xmit_queue; -- cgit v1.2.3 From 9254f8ed15b6dcc9b04b9ad32863a7518cc5a5b1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 4 May 2020 08:30:10 +0300 Subject: net/mlx5: Add support in forward to namespace Currently, fs_core supports rule of forward the traffic to continue matching in the next priority, now we add support to forward the traffic matching in the next namespace. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky --- include/linux/mlx5/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e2d13e074067..6c5aa0a21425 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, MLX5_FLOW_CONTEXT_ACTION_DECRYPT = 1 << 18, + MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS = 1 << 19, }; enum { -- cgit v1.2.3 From 21aef70eade22a656297c28d5da93301915d2ac2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 11:02:16 -0700 Subject: bpf: Change btf_iter func proto prefix to "bpf_iter_" This is to be consistent with tracing and lsm programs which have prefix "bpf_trace_" and "bpf_lsm_" respectively. Suggested-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200513180216.2949387-1-yhs@fb.com --- include/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cf4b6e44f2bc..ab94dfd8826f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1131,10 +1131,10 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); -#define BPF_ITER_FUNC_PREFIX "__bpf_iter__" +#define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ - extern int __bpf_iter__ ## target(args); \ - int __init __bpf_iter__ ## target(args) { return 0; } + extern int bpf_iter_ ## target(args); \ + int __init bpf_iter_ ## target(args) { return 0; } typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); -- cgit v1.2.3 From 15172a46fa2796c1a1358a36babd31274716ed41 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 11:02:19 -0700 Subject: bpf: net: Refactor bpf_iter target registration Currently bpf_iter_reg_target takes parameters from target and allocates memory to save them. This is really not necessary, esp. in the future we may grow information passed from targets to bpf_iter manager. The patch refactors the code so target reg_info becomes static and bpf_iter manager can just take a reference to it. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200513180219.2949605-1-yhs@fb.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ab94dfd8826f..6fa773e2d1bf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1153,7 +1153,7 @@ struct bpf_iter_meta { u64 seq_num; }; -int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); +int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const char *target); bool bpf_iter_prog_supported(struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); -- cgit v1.2.3 From ab2ee4fcb9d61fd57db70db694adbcf54662bd80 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 11:02:20 -0700 Subject: bpf: Change func bpf_iter_unreg_target() signature Change func bpf_iter_unreg_target() parameter from target name to target reg_info, similar to bpf_iter_reg_target(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200513180220.2949737-1-yhs@fb.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6fa773e2d1bf..534174eca86b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1154,7 +1154,7 @@ struct bpf_iter_meta { }; int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); -void bpf_iter_unreg_target(const char *target); +void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); -- cgit v1.2.3 From 3c32cc1bceba8a1755dc35cd97516f6c67856844 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 11:02:21 -0700 Subject: bpf: Enable bpf_iter targets registering ctx argument types Commit b121b341e598 ("bpf: Add PTR_TO_BTF_ID_OR_NULL support") adds a field btf_id_or_null_non0_off to bpf_prog->aux structure to indicate that the first ctx argument is PTR_TO_BTF_ID reg_type and all others are PTR_TO_BTF_ID_OR_NULL. This approach does not really scale if we have other different reg types in the future, e.g., a pointer to a buffer. This patch enables bpf_iter targets registering ctx argument reg types which may be different from the default one. For example, for pointers to structures, the default reg_type is PTR_TO_BTF_ID for tracing program. The target can register a particular pointer type as PTR_TO_BTF_ID_OR_NULL which can be used by the verifier to enforce accesses. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200513180221.2949882-1-yhs@fb.com --- include/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 534174eca86b..c45d198ac38c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -643,6 +643,12 @@ struct bpf_jit_poke_descriptor { u16 reason; }; +/* reg_type info for ctx arguments */ +struct bpf_ctx_arg_aux { + u32 offset; + enum bpf_reg_type reg_type; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@ -654,12 +660,13 @@ struct bpf_prog_aux { u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + u32 ctx_arg_info_size; + const struct bpf_ctx_arg_aux *ctx_arg_info; struct bpf_prog *linked_prog; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; - bool btf_id_or_null_non0_off; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; @@ -1139,12 +1146,15 @@ int bpf_obj_get_user(const char __user *pathname, int flags); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +#define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; const struct seq_operations *seq_ops; bpf_iter_init_seq_priv_t init_seq_private; bpf_iter_fini_seq_priv_t fini_seq_private; u32 seq_priv_size; + u32 ctx_arg_info_size; + struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; }; struct bpf_iter_meta { -- cgit v1.2.3 From 11ecf8c55b91806e4dc6a1b9fe7cbf68cdc9b006 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 13 May 2020 18:35:23 +0200 Subject: net: phy: broadcom: add cable test support Most modern broadcom PHYs support ECD (enhanced cable diagnostics). Add support for it in the bcm-phy-lib so they can easily be used in the PHY driver. There are two access methods for ECD: legacy by expansion registers and via the new RDB registers which are exclusive. Provide functions in two variants where the PHY driver can choose from. To keep things simple for now, we just switch the register access to expansion registers in the RDB variant for now. On the flipside, we have to keep a bus lock to prevent any other non-legacy access on the PHY. The results of the intra-pair tests are inconclusive (at least for the BCM54140). Most of the times half the length is reported but sometimes the length is correct. Signed-off-by: Michael Walle Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 58d0150acc3e..d41624db6de2 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -119,6 +119,11 @@ #define MII_BCM54XX_RDB_ADDR 0x1e #define MII_BCM54XX_RDB_DATA 0x1f +/* legacy access control via rdb/expansion register */ +#define BCM54XX_RDB_REG0087 0x0087 +#define BCM54XX_EXP_REG7E (MII_BCM54XX_EXP_SEL_ER + 0x7E) +#define BCM54XX_ACCESS_MODE_LEGACY_EN BIT(15) + /* * AUXILIARY CONTROL SHADOW ACCESS REGISTERS. (PHY REG 0x18) */ @@ -294,4 +299,51 @@ #define MII_BRCM_CORE_EXPB0 0xB0 #define MII_BRCM_CORE_EXPB1 0xB1 +/* Enhanced Cable Diagnostics */ +#define BCM54XX_RDB_ECD_CTRL 0x2a0 +#define BCM54XX_EXP_ECD_CTRL (MII_BCM54XX_EXP_SEL_ER + 0xc0) + +#define BCM54XX_ECD_CTRL_CABLE_TYPE_CAT3 1 /* CAT3 or worse */ +#define BCM54XX_ECD_CTRL_CABLE_TYPE_CAT5 0 /* CAT5 or better */ +#define BCM54XX_ECD_CTRL_CABLE_TYPE_MASK BIT(0) /* cable type */ +#define BCM54XX_ECD_CTRL_INVALID BIT(3) /* invalid result */ +#define BCM54XX_ECD_CTRL_UNIT_CM 0 /* centimeters */ +#define BCM54XX_ECD_CTRL_UNIT_M 1 /* meters */ +#define BCM54XX_ECD_CTRL_UNIT_MASK BIT(10) /* cable length unit */ +#define BCM54XX_ECD_CTRL_IN_PROGRESS BIT(11) /* test in progress */ +#define BCM54XX_ECD_CTRL_BREAK_LINK BIT(12) /* unconnect link + * during test + */ +#define BCM54XX_ECD_CTRL_CROSS_SHORT_DIS BIT(13) /* disable inter-pair + * short check + */ +#define BCM54XX_ECD_CTRL_RUN BIT(15) /* run immediate */ + +#define BCM54XX_RDB_ECD_FAULT_TYPE 0x2a1 +#define BCM54XX_EXP_ECD_FAULT_TYPE (MII_BCM54XX_EXP_SEL_ER + 0xc1) +#define BCM54XX_ECD_FAULT_TYPE_INVALID 0x0 +#define BCM54XX_ECD_FAULT_TYPE_OK 0x1 +#define BCM54XX_ECD_FAULT_TYPE_OPEN 0x2 +#define BCM54XX_ECD_FAULT_TYPE_SAME_SHORT 0x3 /* short same pair */ +#define BCM54XX_ECD_FAULT_TYPE_CROSS_SHORT 0x4 /* short different pairs */ +#define BCM54XX_ECD_FAULT_TYPE_BUSY 0x9 +#define BCM54XX_ECD_FAULT_TYPE_PAIR_D_MASK GENMASK(3, 0) +#define BCM54XX_ECD_FAULT_TYPE_PAIR_C_MASK GENMASK(7, 4) +#define BCM54XX_ECD_FAULT_TYPE_PAIR_B_MASK GENMASK(11, 8) +#define BCM54XX_ECD_FAULT_TYPE_PAIR_A_MASK GENMASK(15, 12) +#define BCM54XX_ECD_PAIR_A_LENGTH_RESULTS 0x2a2 +#define BCM54XX_ECD_PAIR_B_LENGTH_RESULTS 0x2a3 +#define BCM54XX_ECD_PAIR_C_LENGTH_RESULTS 0x2a4 +#define BCM54XX_ECD_PAIR_D_LENGTH_RESULTS 0x2a5 + +#define BCM54XX_RDB_ECD_PAIR_A_LENGTH_RESULTS 0x2a2 +#define BCM54XX_EXP_ECD_PAIR_A_LENGTH_RESULTS (MII_BCM54XX_EXP_SEL_ER + 0xc2) +#define BCM54XX_RDB_ECD_PAIR_B_LENGTH_RESULTS 0x2a3 +#define BCM54XX_EXP_ECD_PAIR_B_LENGTH_RESULTS (MII_BCM54XX_EXP_SEL_ER + 0xc3) +#define BCM54XX_RDB_ECD_PAIR_C_LENGTH_RESULTS 0x2a4 +#define BCM54XX_EXP_ECD_PAIR_C_LENGTH_RESULTS (MII_BCM54XX_EXP_SEL_ER + 0xc4) +#define BCM54XX_RDB_ECD_PAIR_D_LENGTH_RESULTS 0x2a5 +#define BCM54XX_EXP_ECD_PAIR_D_LENGTH_RESULTS (MII_BCM54XX_EXP_SEL_ER + 0xc5) +#define BCM54XX_ECD_LENGTH_RESULTS_INVALID 0xffff + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From d639836ab3363f935a9a4336cb4ea3828d0437dd Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 May 2020 12:57:17 +0300 Subject: net: qed: adding hw_err states and handling Here we introduce qed device error tracking flags and error types. qed_hw_err_notify is an entrace point to report errors. It'll notify higher level drivers (qede/qedr/etc) to handle and recover the error. List of posible errors comes from hardware interfaces, but could be extended in future. Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8f29e0d8a7b3..1b7d9548ee43 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -607,6 +607,16 @@ struct qed_sb_info { struct qed_dev *cdev; }; +enum qed_hw_err_type { + QED_HW_ERR_FAN_FAIL, + QED_HW_ERR_MFW_RESP_FAIL, + QED_HW_ERR_HW_ATTN, + QED_HW_ERR_DMAE_FAIL, + QED_HW_ERR_RAMROD_FAIL, + QED_HW_ERR_FW_ASSERT, + QED_HW_ERR_LAST, +}; + enum qed_dev_type { QED_DEV_TYPE_BB, QED_DEV_TYPE_AH, @@ -814,6 +824,8 @@ struct qed_common_cb_ops { void (*link_update)(void *dev, struct qed_link_output *link); void (*schedule_recovery_handler)(void *dev); + void (*schedule_hw_err_handler)(void *dev, + enum qed_hw_err_type err_type); void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_protocol_tlv_data)(void *dev, void *data); -- cgit v1.2.3 From 936c7ba4dd5e94a3fc784f2296de5d577a9b5e43 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 May 2020 12:57:22 +0300 Subject: net: qed: attention clearing properties On different hardware events we have to respond differently, on some of hardware indications hw attention (error condition) should be cleared by the driver to continue normal functioning. Here we introduce attention clear flags, and put them on some important events (in aeu_descs). Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 1b7d9548ee43..978e91e9ab65 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1046,6 +1046,15 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); + +/** + * @brief attn_clr_enable - Prevent attentions from being reasserted + * + * @param cdev + * @param clr_enable + */ + void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable); + /** * @brief db_recovery_add - add doorbell information to the doorbell * recovery mechanism. -- cgit v1.2.3 From 8f76812e1cc4d561c3efc3b2586c686b5428d31f Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 May 2020 12:57:27 +0300 Subject: net: qed: fix bad formatting On some adjacent code, fix bad code formatting Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 978e91e9ab65..48325d7790f8 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -821,12 +821,11 @@ enum qed_nvm_flash_cmd { struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); - void (*link_update)(void *dev, - struct qed_link_output *link); + void (*link_update)(void *dev, struct qed_link_output *link); void (*schedule_recovery_handler)(void *dev); void (*schedule_hw_err_handler)(void *dev, enum qed_hw_err_type err_type); - void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); + void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_protocol_tlv_data)(void *dev, void *data); }; -- cgit v1.2.3 From a17b53c4a4b55ec322c132b6670743612229ee9c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 13 May 2020 16:03:53 -0700 Subject: bpf, capability: Introduce CAP_BPF Split BPF operations that are allowed under CAP_SYS_ADMIN into combination of CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN. For backward compatibility include them in CAP_SYS_ADMIN as well. The end result provides simple safety model for applications that use BPF: - to load tracing program types BPF_PROG_TYPE_{KPROBE, TRACEPOINT, PERF_EVENT, RAW_TRACEPOINT, etc} use CAP_BPF and CAP_PERFMON - to load networking program types BPF_PROG_TYPE_{SCHED_CLS, XDP, SK_SKB, etc} use CAP_BPF and CAP_NET_ADMIN There are few exceptions from this rule: - bpf_trace_printk() is allowed in networking programs, but it's using tracing mechanism, hence this helper needs additional CAP_PERFMON if networking program is using this helper. - BPF_F_ZERO_SEED flag for hash/lru map is allowed under CAP_SYS_ADMIN only to discourage production use. - BPF HW offload is allowed under CAP_SYS_ADMIN. - bpf_probe_write_user() is allowed under CAP_SYS_ADMIN only. CAPs are not checked at attach/detach time with two exceptions: - loading BPF_PROG_TYPE_CGROUP_SKB is allowed for unprivileged users, hence CAP_NET_ADMIN is required at attach time. - flow_dissector detach doesn't check prog FD at detach, hence CAP_NET_ADMIN is required at detach time. CAP_SYS_ADMIN is required to iterate BPF objects (progs, maps, links) via get_next_id command and convert them to file descriptor via GET_FD_BY_ID command. This restriction guarantees that mutliple tasks with CAP_BPF are not able to affect each other. That leads to clean isolation of tasks. For example: task A with CAP_BPF and CAP_NET_ADMIN loads and attaches a firewall via bpf_link. task B with the same capabilities cannot detach that firewall unless task A explicitly passed link FD to task B via scm_rights or bpffs. CAP_SYS_ADMIN can still detach/unload everything. Two networking user apps with CAP_SYS_ADMIN and CAP_NET_ADMIN can accidentely mess with each other programs and maps. Two networking user apps with CAP_NET_ADMIN and CAP_BPF cannot affect each other. CAP_NET_ADMIN + CAP_BPF allows networking programs access only packet data. Such networking progs cannot access arbitrary kernel memory or leak pointers. bpftool, bpftrace, bcc tools binaries should NOT be installed with CAP_BPF and CAP_PERFMON, since unpriv users will be able to read kernel secrets. But users with these two permissions will be able to use these tracing tools. CAP_PERFMON is least secure, since it allows kprobes and kernel memory access. CAP_NET_ADMIN can stop network traffic via iproute2. CAP_BPF is the safest from security point of view and harmless on its own. Having CAP_BPF and/or CAP_NET_ADMIN is not enough to write into arbitrary map and if that map is used by firewall-like bpf prog. CAP_BPF allows many bpf prog_load commands in parallel. The verifier may consume large amount of memory and significantly slow down the system. Existing unprivileged BPF operations are not affected. In particular unprivileged users are allowed to load socket_filter and cg_skb program types and to create array, hash, prog_array, map-in-map map types. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200513230355.7858-2-alexei.starovoitov@gmail.com --- include/linux/capability.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 027d7e4a853b..b4345b38a6be 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -256,6 +256,11 @@ static inline bool perfmon_capable(void) return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN); } +static inline bool bpf_capable(void) +{ + return capable(CAP_BPF) || capable(CAP_SYS_ADMIN); +} + /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); -- cgit v1.2.3 From 2c78ee898d8f10ae6fb2fa23a3fbaec96b1b7366 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 13 May 2020 16:03:54 -0700 Subject: bpf: Implement CAP_BPF Implement permissions as stated in uapi/linux/capability.h In order to do that the verifier allow_ptr_leaks flag is split into four flags and they are set as: env->allow_ptr_leaks = bpf_allow_ptr_leaks(); env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); The first three currently equivalent to perfmon_capable(), since leaking kernel pointers and reading kernel memory via side channel attacks is roughly equivalent to reading kernel memory with cap_perfmon. 'bpf_capable' enables bounded loops, precision tracking, bpf to bpf calls and other verifier features. 'allow_ptr_leaks' enable ptr leaks, ptr conversions, subtraction of pointers. 'bypass_spec_v1' disables speculative analysis in the verifier, run time mitigations in bpf array, and enables indirect variable access in bpf programs. 'bypass_spec_v4' disables emission of sanitation code by the verifier. That means that the networking BPF program loaded with CAP_BPF + CAP_NET_ADMIN will have speculative checks done by the verifier and other spectre mitigation applied. Such networking BPF program will not be able to leak kernel pointers and will not be able to access arbitrary kernel memory. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200513230355.7858-3-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 18 +++++++++++++++++- include/linux/bpf_verifier.h | 3 +++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c45d198ac38c..efe8836b5c48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -19,6 +19,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -119,7 +120,7 @@ struct bpf_map { struct bpf_map_memory memory; char name[BPF_OBJ_NAME_LEN]; u32 btf_vmlinux_value_type_id; - bool unpriv_array; + bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ /* 22 bytes hole */ @@ -1095,6 +1096,21 @@ struct bpf_map *bpf_map_get_curr_or_next(u32 *id); extern int sysctl_unprivileged_bpf_disabled; +static inline bool bpf_allow_ptr_leaks(void) +{ + return perfmon_capable(); +} + +static inline bool bpf_bypass_spec_v1(void) +{ + return perfmon_capable(); +} + +static inline bool bpf_bypass_spec_v4(void) +{ + return perfmon_capable(); +} + int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6abd5a778fcd..ea833087e853 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -375,6 +375,9 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool bpf_capable; + bool bypass_spec_v1; + bool bypass_spec_v4; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; -- cgit v1.2.3 From b0ed0bbfb3046ed127f6004b5893ccb6cdd9ba90 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Sat, 16 May 2020 01:24:47 +0800 Subject: net: phy: broadcom: add support for BCM54811 PHY The BCM54811 PHY shares many similarities with the already supported BCM54810 PHY but additionally requires some semi-unique configuration. Signed-off-by: Kevin Lo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index d41624db6de2..6ad4c000661a 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,6 +17,7 @@ #define PHY_ID_BCM5395 0x0143bcf0 #define PHY_ID_BCM53125 0x03625f20 #define PHY_ID_BCM54810 0x03625d00 +#define PHY_ID_BCM54811 0x03625cc0 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 @@ -255,6 +256,7 @@ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) +#define BCM54810_SHD_SCR3_TRDDAPD 0x0100 /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) -- cgit v1.2.3 From 90bf45134d55d626ae2713cac50cda10c6c8b0c2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 15 May 2020 19:22:15 +0200 Subject: mptcp: add new sock flag to deal with join subflows MP_JOIN subflows must not land into the accept queue. Currently tcp_check_req() calls an mptcp specific helper to detect such scenario. Such helper leverages the subflow context to check for MP_JOIN subflows. We need to deal also with MP JOIN failures, even when the subflow context is not available due allocation failure. A possible solution would be changing the syn_recv_sock() signature to allow returning a more descriptive action/ error code and deal with that in tcp_check_req(). Since the above need is MPTCP specific, this patch instead uses a TCP request socket hole to add a MPTCP specific flag. Such flag is used by the MPTCP syn_recv_sock() to tell tcp_check_req() how to deal with the request socket. This change is a no-op for !MPTCP build, and makes the MPTCP code simpler. It allows also the next patch to deal correctly with MP JOIN failure. v1 -> v2: - be more conservative on drop_req initialization (Mat) RFC -> v1: - move the drop_req bit inside tcp_request_sock (Eric) Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Reviewed-by: Christoph Paasch Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e60db06ec28d..bf44e85d709d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -120,6 +120,9 @@ struct tcp_request_sock { u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; bool is_mptcp; +#if IS_ENABLED(CONFIG_MPTCP) + bool drop_req; +#endif u32 txhash; u32 rcv_isn; u32 snt_isn; -- cgit v1.2.3 From 4930f4831b1547b52c5968e9307fe3d840d7fba0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 16 May 2020 10:46:23 +0200 Subject: net: allow __skb_ext_alloc to sleep mptcp calls this from the transmit side, from process context. Allow a sleeping allocation instead of unconditional GFP_ATOMIC. Acked-by: Paolo Abeni Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3000c526f552..531843952809 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4165,7 +4165,7 @@ struct skb_ext { char data[] __aligned(8); }; -struct skb_ext *__skb_ext_alloc(void); +struct skb_ext *__skb_ext_alloc(gfp_t flags); void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); -- cgit v1.2.3 From 356d411c26735bcc62718c4c9181014255dc302d Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Fri, 15 May 2020 15:16:52 -0700 Subject: net/mlx5: Cleanup mlx5_ifc_fte_match_set_misc2_bits Remove the "metadata_reg_b" field and all uses of this field in code to match the device specification. As this field is not in use in SW steering it is safe to remove it. Signed-off-by: Raed Salem Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c9dd6e99ad56..fd8da4875ea0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -584,9 +584,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 metadata_reg_a[0x20]; - u8 metadata_reg_b[0x20]; - - u8 reserved_at_1c0[0x40]; + u8 reserved_at_1a0[0x60]; }; struct mlx5_ifc_fte_match_set_misc3_bits { -- cgit v1.2.3 From 555af0c3fa0b632be73c241cc932129af4b70d27 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 15 May 2020 15:16:53 -0700 Subject: net/mlx5: Move iseg access helper routines close to mlx5_core driver Only mlx5_core driver handles fw initialization check and command interface revision check. Hence move them inside the mlx5_core driver where it is used. This avoid exposing these helpers to all mlx5 drivers. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 24e04901f92e..a988eb405aa6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -823,11 +823,6 @@ static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; } -static inline u16 cmdif_rev(struct mlx5_core_dev *dev) -{ - return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; -} - static inline u32 mlx5_base_mkey(const u32 key) { return key & 0xffffff00u; @@ -1012,11 +1007,6 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, const u8 *mac, bool vlan, u16 vlan_id, u8 port_num); -static inline int fw_initializing(struct mlx5_core_dev *dev) -{ - return ioread32be(&dev->iseg->initializing) >> 31; -} - static inline u32 mlx5_mkey_to_idx(u32 mkey) { return mkey >> 8; -- cgit v1.2.3 From a307593a644443db12888f45eed0dafb5869e2cc Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 18 May 2020 15:23:59 -0700 Subject: net: phy: simplify phy_link_change arguments This function was introduced to allow for different handling of link up and link down events particularly with regard to the netif_carrier. The third argument do_carrier allowed the flag to be left unchanged. Since then the phylink has introduced an implementation that completely ignores the third parameter since it never wants to change the flag and the phylib always sets the third parameter to true so the flag is always changed. Therefore the third argument (i.e. do_carrier) is no longer necessary and can be removed. This also means that the phylib phy_link_down() function no longer needs its second argument. Signed-off-by: Doug Berger Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5d8ff5428010..467aa8bf9f64 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -543,7 +543,7 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; - void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier); + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); #if IS_ENABLED(CONFIG_MACSEC) -- cgit v1.2.3 From 1b66d253610c7f8f257103808a9460223a087469 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 May 2020 00:45:45 +0200 Subject: bpf: Add get{peer, sock}name attach types for sock_addr As stated in 983695fa6765 ("bpf: fix unconnected udp hooks"), the objective for the existing cgroup connect/sendmsg/recvmsg/bind BPF hooks is to be transparent to applications. In Cilium we make use of these hooks [0] in order to enable E-W load balancing for existing Kubernetes service types for all Cilium managed nodes in the cluster. Those backends can be local or remote. The main advantage of this approach is that it operates as close as possible to the socket, and therefore allows to avoid packet-based NAT given in connect/sendmsg/recvmsg hooks we only need to xlate sock addresses. This also allows to expose NodePort services on loopback addresses in the host namespace, for example. As another advantage, this also efficiently blocks bind requests for applications in the host namespace for exposed ports. However, one missing item is that we also need to perform reverse xlation for inet{,6}_getname() hooks such that we can return the service IP/port tuple back to the application instead of the remote peer address. The vast majority of applications does not bother about getpeername(), but in a few occasions we've seen breakage when validating the peer's address since it returns unexpectedly the backend tuple instead of the service one. Therefore, this trivial patch allows to customise and adds a getpeername() as well as getsockname() BPF cgroup hook for both IPv4 and IPv6 in order to address this situation. Simple example: # ./cilium/cilium service list ID Frontend Service Type Backend 1 1.2.3.4:80 ClusterIP 1 => 10.0.0.10:80 Before; curl's verbose output example, no getpeername() reverse xlation: # curl --verbose 1.2.3.4 * Rebuilt URL to: 1.2.3.4/ * Trying 1.2.3.4... * TCP_NODELAY set * Connected to 1.2.3.4 (10.0.0.10) port 80 (#0) > GET / HTTP/1.1 > Host: 1.2.3.4 > User-Agent: curl/7.58.0 > Accept: */* [...] After; with getpeername() reverse xlation: # curl --verbose 1.2.3.4 * Rebuilt URL to: 1.2.3.4/ * Trying 1.2.3.4... * TCP_NODELAY set * Connected to 1.2.3.4 (1.2.3.4) port 80 (#0) > GET / HTTP/1.1 > Host: 1.2.3.4 > User-Agent: curl/7.58.0 > Accept: */* [...] Originally, I had both under a BPF_CGROUP_INET{4,6}_GETNAME type and exposed peer to the context similar as in inet{,6}_getname() fashion, but API-wise this is suboptimal as it always enforces programs having to test for ctx->peer which can easily be missed, hence BPF_CGROUP_INET{4,6}_GET{PEER,SOCK}NAME split. Similarly, the checked return code is on tnum_range(1, 1), but if a use case comes up in future, it can easily be changed to return an error code instead. Helper and ctx member access is the same as with connect/sendmsg/etc hooks. [0] https://github.com/cilium/cilium/blob/master/bpf/bpf_sock.c Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/61a479d759b2482ae3efb45546490bacd796a220.1589841594.git.daniel@iogearbox.net --- include/linux/bpf-cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 272626cc3fc9..c66c545e161a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -396,6 +396,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, } #define cgroup_bpf_enabled (0) +#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; }) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) -- cgit v1.2.3 From 607259a695312cdfac2b52fb9d5b5890c834d573 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2020 15:03:13 +0200 Subject: net: add a new ndo_tunnel_ioctl method This method is used to properly allow kernel callers of the IPv4 route management ioctls. The exsting ip_tunnel_ioctl helper is renamed to ip_tunnel_ctl to better reflect that it doesn't directly implement ioctls touching user memory, and is used for the guts of ndo_tunnel_ctl implementations. A new ip_tunnel_ioctl helper is added that can be wired up directly to the ndo_do_ioctl method and takes care of the copy to and from userspace. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a8f8daef09d..a18f8fdf4260 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -53,6 +53,7 @@ struct netpoll_info; struct device; struct phy_device; struct dsa_port; +struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; @@ -1274,6 +1275,9 @@ struct netdev_net_notifier { * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, * rtnl_lock is not held. + * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, + * int cmd); + * Add, change, delete or get information on an IPv4 tunnel. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1479,6 +1483,8 @@ struct net_device_ops { int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); + int (*ndo_tunnel_ctl)(struct net_device *dev, + struct ip_tunnel_parm *p, int cmd); }; /** -- cgit v1.2.3 From 931ca7ab7fe804d77bc6952f1512950c0d870f26 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2020 17:18:30 -0400 Subject: ip*_mc_gsfget(): lift copyout of struct group_filter into callers pass the userland pointer to the array in its tail, so that part gets copied out by our functions; copyout of everything else is done in the callers. Rationale: reuse for compat; the array is the same in native and compat, the layout of parts before it is different for compat. Signed-off-by: Al Viro --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index faa6586a5783..64ce8cd1cfaf 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -123,7 +123,7 @@ extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex); extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_msfilter __user *optval, int __user *optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen); + struct sockaddr_storage __user *p); extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); -- cgit v1.2.3 From 7bfb399eca460500f048098bf427c45b40e17cae Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Tue, 19 May 2020 23:51:26 +0300 Subject: qed: Add XRC to RoCE Add support for XRC-SRQ's and XRC-QP's for upper layer driver. We maintain separate bitmaps for resource management for srq and xrc-srq, However, the range in FW is one, The xrc-srq's are first and then the srq's follow. Therefore we maintain a srq-id offset. v2: perform cleanups if XRC bitmpas allocation fail. Signed-off-by: Michal Kalderon Signed-off-by: Yuval Bason Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 74efca15fde7..f93edd5750a5 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -53,6 +53,13 @@ enum qed_roce_qp_state { QED_ROCE_QP_STATE_SQE }; +enum qed_rdma_qp_type { + QED_RDMA_QP_TYPE_RC, + QED_RDMA_QP_TYPE_XRC_INI, + QED_RDMA_QP_TYPE_XRC_TGT, + QED_RDMA_QP_TYPE_INVAL = 0xffff, +}; + enum qed_rdma_tid_type { QED_RDMA_TID_REGISTERED_MR, QED_RDMA_TID_FMR, @@ -291,6 +298,12 @@ struct qed_rdma_create_srq_in_params { u16 num_pages; u16 pd_id; u16 page_size; + + /* XRC related only */ + bool reserved_key_en; + bool is_xrc; + u32 cq_cid; + u16 xrcd_id; }; struct qed_rdma_destroy_cq_in_params { @@ -319,7 +332,9 @@ struct qed_rdma_create_qp_in_params { u16 rq_num_pages; u64 rq_pbl_ptr; u16 srq_id; + u16 xrcd_id; u8 stats_queue; + enum qed_rdma_qp_type qp_type; }; struct qed_rdma_create_qp_out_params { @@ -429,11 +444,13 @@ struct qed_rdma_create_srq_out_params { struct qed_rdma_destroy_srq_in_params { u16 srq_id; + bool is_xrc; }; struct qed_rdma_modify_srq_in_params { u32 wqe_limit; u16 srq_id; + bool is_xrc; }; struct qed_rdma_stats_out_params { @@ -611,6 +628,8 @@ struct qed_rdma_ops { int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_alloc_xrcd)(void *rdma_cxt, u16 *xrcd); + void (*rdma_dealloc_xrcd)(void *rdma_cxt, u16 xrcd); int (*rdma_create_cq)(void *rdma_cxt, struct qed_rdma_create_cq_in_params *params, u16 *icid); -- cgit v1.2.3 From 8066021915924f58ed338bf38208215f5a7355f6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 20 May 2020 08:29:14 +0200 Subject: ethtool: provide UAPI for PHY Signal Quality Index (SQI) Signal Quality Index is a mandatory value required by "OPEN Alliance SIG" for the 100Base-T1 PHYs [1]. This indicator can be used for cable integrity diagnostic and investigating other noise sources and implement by at least two vendors: NXP[2] and TI[3]. [1] http://www.opensig.org/download/document/218/Advanced_PHY_features_for_automotive_Ethernet_V1.0.pdf [2] https://www.nxp.com/docs/en/data-sheet/TJA1100.pdf [3] https://www.ti.com/product/DP83TC811R-Q1 Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 467aa8bf9f64..2bcdf19ed3b4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -723,6 +723,8 @@ struct phy_driver { struct ethtool_tunable *tuna, const void *data); int (*set_loopback)(struct phy_device *dev, bool enable); + int (*get_sqi)(struct phy_device *dev); + int (*get_sqi_max)(struct phy_device *dev); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -- cgit v1.2.3 From 65ece6de0114fc84fbc0487bf68cae91d535dd78 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Apr 2020 11:50:49 +0200 Subject: virtchnl: Add missing explicit padding to structures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On e.g. m68k, the alignment of 32-bit values is only 2 bytes, leading to the following: ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero] { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:577:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); ^~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:577:32: error: enumerator value for ‘virtchnl_static_assert_virtchnl_filter’ is not an integer constant VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); ^~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero] { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:619:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event); ^~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:619:31: error: enumerator value for ‘virtchnl_static_assert_virtchnl_pf_event’ is not an integer constant VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event); ^~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero] { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:640:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info); ^~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:640:31: error: enumerator value for ‘virtchnl_static_assert_virtchnl_iwarp_qv_info’ is not an integer constant VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info); ^~~~~~~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero] { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ ./include/linux/avf/virtchnl.h:647:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info); ^~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:647:31: error: enumerator value for ‘virtchnl_static_assert_virtchnl_iwarp_qvlist_info’ is not an integer constant VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info); ^~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’ { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } ^ Fix this by adding explicit padding to structures with holes. Reported-by: Signed-off-by: Geert Uytterhoeven Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index ca956b672ac0..40bad71865ea 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -476,6 +476,7 @@ struct virtchnl_rss_key { u16 vsi_id; u16 key_len; u8 key[1]; /* RSS hash key, packed bytes */ + u8 pad[1]; }; VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key); @@ -484,6 +485,7 @@ struct virtchnl_rss_lut { u16 vsi_id; u16 lut_entries; u8 lut[1]; /* RSS lookup table */ + u8 pad[1]; }; VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut); @@ -572,6 +574,7 @@ struct virtchnl_filter { enum virtchnl_action action; u32 action_meta; u8 field_flags; + u8 pad[3]; }; VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); @@ -610,6 +613,7 @@ struct virtchnl_pf_event { /* link_speed provided in Mbps */ u32 link_speed; u8 link_status; + u8 pad[3]; } link_event_adv; } event_data; @@ -635,6 +639,7 @@ struct virtchnl_iwarp_qv_info { u16 ceq_idx; u16 aeq_idx; u8 itr_idx; + u8 pad[3]; }; VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info); -- cgit v1.2.3 From cd16627fc0468564fdd60f20ad52420b87195127 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Sat, 23 May 2020 15:27:10 +0200 Subject: net: devres: provide devm_register_netdev() Provide devm_register_netdev() - a device resource managed variant of register_netdev(). This new helper will only work for net_device structs that are also already managed by devres. Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a18f8fdf4260..1a96e9c4ec36 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4280,6 +4280,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, int register_netdev(struct net_device *dev); void unregister_netdev(struct net_device *dev); +int devm_register_netdev(struct device *dev, struct net_device *ndev); + /* General hardware address lists handling functions */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); -- cgit v1.2.3 From ff937b916eb6316fe4644564a572ed3b5867bc1f Mon Sep 17 00:00:00 2001 From: Yuval Basson Date: Tue, 26 May 2020 09:41:20 +0300 Subject: qed: Add EDPM mode type for user-fw compatibility In older FW versions the completion flag was treated as the ack flag in edpm messages. Expose the FW option of setting which mode the QP is in by adding a flag to the qedr <-> qed API. Flag is added for backward compatibility with libqedr. This flag will be set by qedr after determining whether the libqedr is using the updated version. Fixes: f10939403352 ("qed: Add support for QP verbs") Signed-off-by: Yuval Basson Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_rdma_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index f93edd5750a5..584077565f12 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -335,6 +335,9 @@ struct qed_rdma_create_qp_in_params { u16 xrcd_id; u8 stats_queue; enum qed_rdma_qp_type qp_type; + u8 flags; +#define QED_ROCE_EDPM_MODE_MASK 0x1 +#define QED_ROCE_EDPM_MODE_SHIFT 0 }; struct qed_rdma_create_qp_out_params { -- cgit v1.2.3 From 90ce665c6a40dc1be771bf5f86e624c0acf3a76f Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 May 2020 16:29:36 +0100 Subject: net: mdiobus: add clause 45 mdiobus accessors There is a recurring pattern throughout some of the PHY code converting a devad and regnum to our packed clause 45 representation. Rather than having this scattered around the code, let's put a common translation function in mdio.h, and provide some register accessors. Convert the phylib core, phylink, bcm87xx and cortina to use these. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 31 +++++++++++++++++++++++++++++++ include/linux/phy.h | 6 ------ 2 files changed, 31 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 917e4bb2ed71..36d2e0673d03 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -9,6 +9,13 @@ #include #include +/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit + * IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. + */ +#define MII_ADDR_C45 (1<<30) +#define MII_DEVADDR_C45_SHIFT 16 +#define MII_REGADDR_C45_MASK GENMASK(15, 0) + struct gpio_desc; struct mii_bus; @@ -326,6 +333,30 @@ int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set); +static inline u32 mdiobus_c45_addr(int devad, u16 regnum) +{ + return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum; +} + +static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, + u16 regnum) +{ + return __mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); +} + +static inline int __mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, + u16 regnum, u16 val) +{ + return __mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), + val); +} + +static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad, + u16 regnum) +{ + return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum)); +} + int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); diff --git a/include/linux/phy.h b/include/linux/phy.h index 2bcdf19ed3b4..6d256e720a66 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -209,12 +209,6 @@ static inline const char *phy_modes(phy_interface_t interface) #define MII_BUS_ID_SIZE 61 -/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit - IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. */ -#define MII_ADDR_C45 (1<<30) -#define MII_DEVADDR_C45_SHIFT 16 -#define MII_REGADDR_C45_MASK GENMASK(15, 0) - struct device; struct phylink; struct sfp_bus; -- cgit v1.2.3 From 1a644de29f712771c2ec00e52caa391544eb6141 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 27 May 2020 00:21:38 +0200 Subject: net: ethtool: Add generic parts of cable test TDR Add the generic parts of the code used to trigger a cable test and return raw TDR data. Any PHY driver which support this must implement the new driver op. Signed-off-by: Andrew Lunn v2 Update nxp-tja11xx for API change. Signed-off-by: David S. Miller --- include/linux/ethtool_netlink.h | 4 ++-- include/linux/phy.h | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index e317fc99565e..24817ba252a0 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -17,13 +17,13 @@ enum ethtool_multicast_groups { struct phy_device; #if IS_ENABLED(CONFIG_ETHTOOL_NETLINK) -int ethnl_cable_test_alloc(struct phy_device *phydev); +int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd); void ethnl_cable_test_free(struct phy_device *phydev); void ethnl_cable_test_finished(struct phy_device *phydev); int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result); int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm); #else -static inline int ethnl_cable_test_alloc(struct phy_device *phydev) +static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) { return -EOPNOTSUPP; } diff --git a/include/linux/phy.h b/include/linux/phy.h index 6d256e720a66..d3c384f353ca 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -699,6 +699,10 @@ struct phy_driver { /* Start a cable test */ int (*cable_test_start)(struct phy_device *dev); + + /* Start a raw TDR cable test */ + int (*cable_test_tdr_start)(struct phy_device *dev); + /* Once per second, or on interrupt, request the status of the * test. */ @@ -1251,6 +1255,8 @@ int phy_reset_after_clk_enable(struct phy_device *phydev); #if IS_ENABLED(CONFIG_PHYLIB) int phy_start_cable_test(struct phy_device *phydev, struct netlink_ext_ack *extack); +int phy_start_cable_test_tdr(struct phy_device *phydev, + struct netlink_ext_ack *extack); #else static inline int phy_start_cable_test(struct phy_device *phydev, @@ -1259,6 +1265,13 @@ int phy_start_cable_test(struct phy_device *phydev, NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support"); return -EOPNOTSUPP; } +static inline +int phy_start_cable_test_tdr(struct phy_device *phydev, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support"); + return -EOPNOTSUPP; +} #endif int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result); -- cgit v1.2.3 From 6b4a0fc106521e480c00b55a7ef38c89f02dc4e8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 27 May 2020 00:21:39 +0200 Subject: net: ethtool: Add helpers for cable test TDR data Add helpers for returning raw TDR helpers in netlink messages. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool_netlink.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 24817ba252a0..8fbe4f97ffad 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -22,6 +22,10 @@ void ethnl_cable_test_free(struct phy_device *phydev); void ethnl_cable_test_finished(struct phy_device *phydev); int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result); int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm); +int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV); +int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV); +int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last, + u32 step); #else static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) { @@ -46,5 +50,22 @@ static inline int ethnl_cable_test_fault_length(struct phy_device *phydev, { return -EOPNOTSUPP; } + +static inline int ethnl_cable_test_amplitude(struct phy_device *phydev, + u8 pair, s16 mV) +{ + return -EOPNOTSUPP; +} + +static inline int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV) +{ + return -EOPNOTSUPP; +} + +static inline int ethnl_cable_test_step(struct phy_device *phydev, u32 first, + u32 last, u32 step) +{ + return -EOPNOTSUPP; +} #endif /* IS_ENABLED(ETHTOOL_NETLINK) */ #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ -- cgit v1.2.3 From f2bc8ad31a7f814237bc6301d59296d76505a688 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 27 May 2020 00:21:41 +0200 Subject: net: ethtool: Allow PHY cable test TDR data to configured Allow the user to configure where on the cable the TDR data should be retrieved, in terms of first and last sample, and the step between samples. Also add the ability to ask for TDR data for just one pair. If this configuration is not provided, it defaults to 1-150m at 1m intervals for all pairs. Signed-off-by: Andrew Lunn v3: Move the TDR configuration into a structure Add a range check on step Use NL_SET_ERR_MSG_ATTR() when appropriate Move TDR configuration into a nest Document attributes in the request Signed-off-by: David S. Miller --- include/linux/phy.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index d3c384f353ca..8c05d0fb5c00 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -548,6 +548,18 @@ struct phy_device { #define to_phy_device(d) container_of(to_mdio_device(d), \ struct phy_device, mdio) +/* A structure containing possible configuration parameters + * for a TDR cable test. The driver does not need to implement + * all the parameters, but should report what is actually used. + */ +struct phy_tdr_config { + u32 first; + u32 last; + u32 step; + s8 pair; +}; +#define PHY_PAIR_ALL -1 + /* struct phy_driver: Driver structure for a particular PHY type * * driver_data: static driver data @@ -701,7 +713,8 @@ struct phy_driver { int (*cable_test_start)(struct phy_device *dev); /* Start a raw TDR cable test */ - int (*cable_test_tdr_start)(struct phy_device *dev); + int (*cable_test_tdr_start)(struct phy_device *dev, + const struct phy_tdr_config *config); /* Once per second, or on interrupt, request the status of the * test. @@ -1256,7 +1269,8 @@ int phy_reset_after_clk_enable(struct phy_device *phydev); int phy_start_cable_test(struct phy_device *phydev, struct netlink_ext_ack *extack); int phy_start_cable_test_tdr(struct phy_device *phydev, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, + const struct phy_tdr_config *config); #else static inline int phy_start_cable_test(struct phy_device *phydev, @@ -1267,7 +1281,8 @@ int phy_start_cable_test(struct phy_device *phydev, } static inline int phy_start_cable_test_tdr(struct phy_device *phydev, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + const struct phy_tdr_config *config) { NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support"); return -EOPNOTSUPP; -- cgit v1.2.3 From e76fede8bf7c90d92c799d9ceb092dec48346e2c Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 30 Apr 2020 10:25:50 -0700 Subject: cfg80211: add KHz variants of frame RX API Drivers may wish to report the RX frequency in units of KHz. Provide cfg80211_rx_mgmt_khz() and wrap it with cfg80211_rx_mgmt() so exisiting drivers which can't report KHz anyway don't need to change. Add a similar wrapper for cfg80211_report_obss_beacon() so the frequency units stay somewhat consistent. This doesn't actually change the nl80211 API yet. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200430172554.18383-2-thomas@adapt-ip.com [fix mac80211 calling the non-khz version of obss beacon report, drop trace point name changes] Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a561db435a4b..41d5f000c0d9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3333,6 +3333,8 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size) /* convert frequencies */ #define MHZ_TO_KHZ(freq) ((freq) * 1000) #define KHZ_TO_MHZ(freq) ((freq) / 1000) +#define PR_KHZ(f) KHZ_TO_MHZ(f), f % 1000 +#define KHZ_F "%d.%03d" /* convert powers */ #define DBI_TO_MBI(gain) ((gain) * 100) -- cgit v1.2.3 From d6fb67ff86bb991d5ac18471e5f739bc32e5090e Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 30 Apr 2020 10:25:53 -0700 Subject: ieee80211: S1G defines These are found in IEEE-802.11ah-2016. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200430172554.18383-5-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 221 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 41d5f000c0d9..f630b8978a43 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -105,6 +105,51 @@ /* extension, added by 802.11ad */ #define IEEE80211_STYPE_DMG_BEACON 0x0000 +#define IEEE80211_STYPE_S1G_BEACON 0x0010 + +/* bits unique to S1G beacon */ +#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 + +/* see 802.11ah-2016 9.9 NDP CMAC frames */ +#define IEEE80211_S1G_1MHZ_NDP_BITS 25 +#define IEEE80211_S1G_1MHZ_NDP_BYTES 4 +#define IEEE80211_S1G_2MHZ_NDP_BITS 37 +#define IEEE80211_S1G_2MHZ_NDP_BYTES 5 + +#define IEEE80211_NDP_FTYPE_CTS 0 +#define IEEE80211_NDP_FTYPE_CF_END 0 +#define IEEE80211_NDP_FTYPE_PS_POLL 1 +#define IEEE80211_NDP_FTYPE_ACK 2 +#define IEEE80211_NDP_FTYPE_PS_POLL_ACK 3 +#define IEEE80211_NDP_FTYPE_BA 4 +#define IEEE80211_NDP_FTYPE_BF_REPORT_POLL 5 +#define IEEE80211_NDP_FTYPE_PAGING 6 +#define IEEE80211_NDP_FTYPE_PREQ 7 + +#define SM64(f, v) ((((u64)v) << f##_S) & f) + +/* NDP CMAC frame fields */ +#define IEEE80211_NDP_FTYPE 0x0000000000000007 +#define IEEE80211_NDP_FTYPE_S 0x0000000000000000 + +/* 1M Probe Request 11ah 9.9.3.1.1 */ +#define IEEE80211_NDP_1M_PREQ_ANO 0x0000000000000008 +#define IEEE80211_NDP_1M_PREQ_ANO_S 3 +#define IEEE80211_NDP_1M_PREQ_CSSID 0x00000000000FFFF0 +#define IEEE80211_NDP_1M_PREQ_CSSID_S 4 +#define IEEE80211_NDP_1M_PREQ_RTYPE 0x0000000000100000 +#define IEEE80211_NDP_1M_PREQ_RTYPE_S 20 +#define IEEE80211_NDP_1M_PREQ_RSV 0x0000000001E00000 +#define IEEE80211_NDP_1M_PREQ_RSV 0x0000000001E00000 +/* 2M Probe Request 11ah 9.9.3.1.2 */ +#define IEEE80211_NDP_2M_PREQ_ANO 0x0000000000000008 +#define IEEE80211_NDP_2M_PREQ_ANO_S 3 +#define IEEE80211_NDP_2M_PREQ_CSSID 0x0000000FFFFFFFF0 +#define IEEE80211_NDP_2M_PREQ_CSSID_S 4 +#define IEEE80211_NDP_2M_PREQ_RTYPE 0x0000001000000000 +#define IEEE80211_NDP_2M_PREQ_RTYPE_S 36 + +#define IEEE80211_ANO_NETTYPE_WILD 15 /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 @@ -121,6 +166,21 @@ #define IEEE80211_MAX_SN IEEE80211_SN_MASK #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) + +/* PV1 Layout 11ah 9.8.3.1 */ +#define IEEE80211_PV1_FCTL_VERS 0x0003 +#define IEEE80211_PV1_FCTL_FTYPE 0x001c +#define IEEE80211_PV1_FCTL_STYPE 0x00e0 +#define IEEE80211_PV1_FCTL_TODS 0x0100 +#define IEEE80211_PV1_FCTL_MOREFRAGS 0x0200 +#define IEEE80211_PV1_FCTL_PM 0x0400 +#define IEEE80211_PV1_FCTL_MOREDATA 0x0800 +#define IEEE80211_PV1_FCTL_PROTECTED 0x1000 +#define IEEE80211_PV1_FCTL_END_SP 0x2000 +#define IEEE80211_PV1_FCTL_RELAYED 0x4000 +#define IEEE80211_PV1_FCTL_ACK_POLICY 0x8000 +#define IEEE80211_PV1_FCTL_CTL_EXT 0x0f00 + static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) { return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); @@ -148,6 +208,7 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) #define IEEE80211_MAX_FRAG_THRESHOLD 2352 #define IEEE80211_MAX_RTS_THRESHOLD 2353 #define IEEE80211_MAX_AID 2007 +#define IEEE80211_MAX_AID_S1G 8191 #define IEEE80211_MAX_TIM_LEN 251 #define IEEE80211_MAX_MESH_PEERINGS 63 /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section @@ -371,6 +432,17 @@ static inline bool ieee80211_is_data(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_DATA); } +/** + * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_ext(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == + cpu_to_le16(IEEE80211_FTYPE_EXT); +} + + /** * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder @@ -469,6 +541,18 @@ static inline bool ieee80211_is_beacon(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); } +/** + * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT && + * IEEE80211_STYPE_S1G_BEACON + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_s1g_beacon(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | + IEEE80211_FCTL_STYPE)) == + cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); +} + /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder @@ -900,6 +984,59 @@ struct ieee80211_addba_ext_ie { u8 data; } __packed; +/** + * struct ieee80211_s1g_bcn_compat_ie + * + * S1G Beacon Compatibility element + */ +struct ieee80211_s1g_bcn_compat_ie { + __le16 compat_info; + __le16 beacon_int; + __le32 tsf_completion; +} __packed; + +/** + * struct ieee80211_s1g_oper_ie + * + * S1G Operation element + */ +struct ieee80211_s1g_oper_ie { + u8 ch_width; + u8 oper_class; + u8 primary_ch; + u8 oper_ch; + __le16 basic_mcs_nss; +} __packed; + +/** + * struct ieee80211_aid_response_ie + * + * AID Response element + */ +struct ieee80211_aid_response_ie { + __le16 aid; + u8 switch_count; + __le16 response_int; +} __packed; + +struct ieee80211_s1g_cap { + u8 capab_info[10]; + u8 supp_mcs_nss[5]; +} __packed; + +struct ieee80211_ext { + __le16 frame_control; + __le16 duration; + union { + struct { + u8 sa[ETH_ALEN]; + __le32 timestamp; + u8 change_seq; + u8 variable[0]; + } __packed s1g_beacon; + } u; +} __packed __aligned(2); + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -2137,6 +2274,86 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) return spr_len; } +/* S1G Capabilities Information field */ +#define S1G_CAPAB_B0_S1G_LONG BIT(0) +#define S1G_CAPAB_B0_SGI_1MHZ BIT(1) +#define S1G_CAPAB_B0_SGI_2MHZ BIT(2) +#define S1G_CAPAB_B0_SGI_4MHZ BIT(3) +#define S1G_CAPAB_B0_SGI_8MHZ BIT(4) +#define S1G_CAPAB_B0_SGI_16MHZ BIT(5) +#define S1G_CAPAB_B0_SUPP_CH_WIDTH_MASK (BIT(6) | BIT(7)) +#define S1G_CAPAB_B0_SUPP_CH_WIDTH_SHIFT 6 + +#define S1G_CAPAB_B1_RX_LDPC BIT(0) +#define S1G_CAPAB_B1_TX_STBC BIT(1) +#define S1G_CAPAB_B1_RX_STBC BIT(2) +#define S1G_CAPAB_B1_SU_BFER BIT(3) +#define S1G_CAPAB_B1_SU_BFEE BIT(4) +#define S1G_CAPAB_B1_BFEE_STS_MASK (BIT(5) | BIT(6) | BIT(7)) +#define S1G_CAPAB_B1_BFEE_STS_SHIFT 5 + +#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_MASK (BIT(0) | BIT(1) | BIT(2)) +#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_SHIFT 0 +#define S1G_CAPAB_B2_MU_BFER BIT(3) +#define S1G_CAPAB_B2_MU_BFEE BIT(4) +#define S1G_CAPAB_B2_PLUS_HTC_VHT BIT(5) +#define S1G_CAPAB_B2_TRAVELING_PILOT_MASK (BIT(6) | BIT(7)) +#define S1G_CAPAB_B2_TRAVELING_PILOT_SHIFT 6 + +#define S1G_CAPAB_B3_RD_RESPONDER BIT(0) +#define S1G_CAPAB_B3_HT_DELAYED_BA BIT(1) +#define S1G_CAPAB_B3_MAX_MPDU_LEN BIT(2) +#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_MASK (BIT(3) | BIT(4)) +#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_SHIFT 3 +#define S1G_CAPAB_B3_MIN_MPDU_START_MASK (BIT(5) | BIT(6) | BIT(7)) +#define S1G_CAPAB_B3_MIN_MPDU_START_SHIFT 5 + +#define S1G_CAPAB_B4_UPLINK_SYNC BIT(0) +#define S1G_CAPAB_B4_DYNAMIC_AID BIT(1) +#define S1G_CAPAB_B4_BAT BIT(2) +#define S1G_CAPAB_B4_TIME_ADE BIT(3) +#define S1G_CAPAB_B4_NON_TIM BIT(4) +#define S1G_CAPAB_B4_GROUP_AID BIT(5) +#define S1G_CAPAB_B4_STA_TYPE_MASK (BIT(6) | BIT(7)) +#define S1G_CAPAB_B4_STA_TYPE_SHIFT 6 + +#define S1G_CAPAB_B5_CENT_AUTH_CONTROL BIT(0) +#define S1G_CAPAB_B5_DIST_AUTH_CONTROL BIT(1) +#define S1G_CAPAB_B5_AMSDU BIT(2) +#define S1G_CAPAB_B5_AMPDU BIT(3) +#define S1G_CAPAB_B5_ASYMMETRIC_BA BIT(4) +#define S1G_CAPAB_B5_FLOW_CONTROL BIT(5) +#define S1G_CAPAB_B5_SECTORIZED_BEAM_MASK (BIT(6) | BIT(7)) +#define S1G_CAPAB_B5_SECTORIZED_BEAM_SHIFT 6 + +#define S1G_CAPAB_B6_OBSS_MITIGATION BIT(0) +#define S1G_CAPAB_B6_FRAGMENT_BA BIT(1) +#define S1G_CAPAB_B6_NDP_PS_POLL BIT(2) +#define S1G_CAPAB_B6_RAW_OPERATION BIT(3) +#define S1G_CAPAB_B6_PAGE_SLICING BIT(4) +#define S1G_CAPAB_B6_TXOP_SHARING_IMP_ACK BIT(5) +#define S1G_CAPAB_B6_VHT_LINK_ADAPT_MASK (BIT(6) | BIT(7)) +#define S1G_CAPAB_B6_VHT_LINK_ADAPT_SHIFT 6 + +#define S1G_CAPAB_B7_TACK_AS_PS_POLL BIT(0) +#define S1G_CAPAB_B7_DUP_1MHZ BIT(1) +#define S1G_CAPAB_B7_MCS_NEGOTIATION BIT(2) +#define S1G_CAPAB_B7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3) +#define S1G_CAPAB_B7_NDP_BFING_REPORT_POLL BIT(4) +#define S1G_CAPAB_B7_UNSOLICITED_DYN_AID BIT(5) +#define S1G_CAPAB_B7_SECTOR_TRAINING_OPERATION BIT(6) +#define S1G_CAPAB_B7_TEMP_PS_MODE_SWITCH BIT(7) + +#define S1G_CAPAB_B8_TWT_GROUPING BIT(0) +#define S1G_CAPAB_B8_BDT BIT(1) +#define S1G_CAPAB_B8_COLOR_MASK (BIT(2) | BIT(3) | BIT(4)) +#define S1G_CAPAB_B8_COLOR_SHIFT 2 +#define S1G_CAPAB_B8_TWT_REQUEST BIT(5) +#define S1G_CAPAB_B8_TWT_RESPOND BIT(6) +#define S1G_CAPAB_B8_PV1_FRAME BIT(7) + +#define S1G_CAPAB_B9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0) + /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 @@ -2532,8 +2749,12 @@ enum ieee80211_eid { WLAN_EID_QUIET_CHANNEL = 198, WLAN_EID_OPMODE_NOTIF = 199, + WLAN_EID_S1G_BCN_COMPAT = 213, + WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214, + WLAN_EID_S1G_CAPABILITIES = 217, WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, + WLAN_EID_S1G_OPERATION = 232, WLAN_EID_CAG_NUMBER = 237, WLAN_EID_AP_CSN = 239, WLAN_EID_FILS_INDICATION = 240, -- cgit v1.2.3 From fedd0fe4e89b009f31eb53ec36dbdf1e457616c0 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Date: Mon, 4 May 2020 22:34:59 +0530 Subject: mac80211: Add new AMPDU factor macro for HE peer caps Add IEEE80211_HE_VHT_MAX_AMPDU_FACTOR and IEEE80211_HE_HT_MAX_AMPDU_FACTOR as per spec to use for peer max ampdu factor. Signed-off-by: Tamizh Chelvam Link: https://lore.kernel.org/r/1588611900-21185-1-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f630b8978a43..2153d465d752 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1958,6 +1958,8 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 +#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_SHIFT 3 + #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 @@ -1979,6 +1981,9 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, #define IEEE80211_HE_MAC_CAP5_PUNCTURED_SOUNDING 0x40 #define IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX 0x80 +#define IEEE80211_HE_VHT_MAX_AMPDU_FACTOR 20 +#define IEEE80211_HE_HT_MAX_AMPDU_FACTOR 16 + /* 802.11ax HE PHY capabilities */ #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 -- cgit v1.2.3 From 396fba0a59f3c94d6fd6443fbeabd8bd9e3956eb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:39:09 -0500 Subject: cfg80211: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200507183909.GA12993@embeddedor Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2153d465d752..0320ca4c7d28 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -800,7 +800,7 @@ struct ieee80211_msrment_ie { u8 token; u8 mode; u8 type; - u8 request[0]; + u8 request[]; } __packed; /** @@ -1781,7 +1781,7 @@ struct ieee80211_he_operation { __le32 he_oper_params; __le16 he_mcs_nss_set; /* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */ - u8 optional[0]; + u8 optional[]; } __packed; /** @@ -1793,7 +1793,7 @@ struct ieee80211_he_operation { struct ieee80211_he_spr { u8 he_sr_control; /* Optional 0 to 19 bytes: depends on @he_sr_control */ - u8 optional[0]; + u8 optional[]; } __packed; /** -- cgit v1.2.3 From 6b646a7e4af69814dd1a3340fca0f02d4977420d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 9 Mar 2020 16:44:25 +0200 Subject: net/mlx5: Add ability to read and write ECE options The end result of RDMA-CM ECE handshake is ECE options, which is needed to be used while configuring data QPs. Such options can come in any QP state, so add in/out fields to set and query ECE options. OUT fields: * create_qp() - default ECE options for that type of QP. * modify_qp() - enabled ECE options after QP state transition. IN fields: * create_qp() - create QP with this ECE option. * modify_qp() - requested options. For unconnected QPs, the FW will return an error if ECE is already configured with any options that not equal to previously set. Reviewed-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fd8da4875ea0..1a56dc079c32 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1208,7 +1208,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_99[0x2]; u8 log_max_qp[0x5]; - u8 reserved_at_a0[0xb]; + u8 reserved_at_a0[0x3]; + u8 ece_support[0x1]; + u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; u8 reserved_at_b0[0x10]; @@ -4216,7 +4218,8 @@ struct mlx5_ifc_rts2rts_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rts2rts_qp_in_bits { @@ -4233,7 +4236,7 @@ struct mlx5_ifc_rts2rts_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -4246,7 +4249,8 @@ struct mlx5_ifc_rtr2rts_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rtr2rts_qp_in_bits { @@ -4263,7 +4267,7 @@ struct mlx5_ifc_rtr2rts_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -4815,7 +4819,8 @@ struct mlx5_ifc_query_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; u8 opt_param_mask[0x20]; @@ -6580,7 +6585,8 @@ struct mlx5_ifc_init2rtr_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_init2rtr_qp_in_bits { @@ -6597,7 +6603,7 @@ struct mlx5_ifc_init2rtr_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -7693,7 +7699,7 @@ struct mlx5_ifc_create_qp_out_bits { u8 reserved_at_40[0x8]; u8 qpn[0x18]; - u8 reserved_at_60[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_create_qp_in_bits { @@ -7707,7 +7713,7 @@ struct mlx5_ifc_create_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; -- cgit v1.2.3 From 7a15b2e013f535a125ad7351ffc808c79bc6de35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 27 May 2020 20:22:29 +0200 Subject: net: remove kernel_getsockopt No users left. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/net.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 6451425e828f..74ef5d7315f7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -303,8 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); int kernel_getsockname(struct socket *sock, struct sockaddr *addr); int kernel_getpeername(struct socket *sock, struct sockaddr *addr); -int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, - int *optlen); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen); int kernel_sendpage(struct socket *sock, struct page *page, int offset, -- cgit v1.2.3 From db10538a4b997a77a1fd561adaaa58afc7dcfa2f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:18 +0200 Subject: tcp: add tcp_sock_set_cork Add a helper to directly set the TCP_CORK sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bf44e85d709d..889eeb2256c2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -497,4 +497,6 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); +void tcp_sock_set_cork(struct sock *sk, bool on); + #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From 12abc5ee7873a085cc280240822b8ac53c86fecd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:19 +0200 Subject: tcp: add tcp_sock_set_nodelay Add a helper to directly set the TCP_NODELAY sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig Acked-by: Sagi Grimberg Acked-by: Jason Gunthorpe Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 889eeb2256c2..9e42c7fe50a8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -498,5 +498,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); +void tcp_sock_set_nodelay(struct sock *sk); #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From ddd061b8daed3ce0c01109a69c9a2a9f9669f01a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:20 +0200 Subject: tcp: add tcp_sock_set_quickack Add a helper to directly set the TCP_QUICKACK sockopt from kernel space without going through a fake uaccess. Cleanup the callers to avoid pointless wrappers now that this is a simple function call. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9e42c7fe50a8..2eaf8320b9db 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -499,5 +499,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); +void tcp_sock_set_quickack(struct sock *sk, int val); #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From 557eadfcc5ee8f8fa98a795e05ed21db58a65db5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:21 +0200 Subject: tcp: add tcp_sock_set_syncnt Add a helper to directly set the TCP_SYNCNT sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Acked-by: Sagi Grimberg Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2eaf8320b9db..6aa4ae5ebf3d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -500,5 +500,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); +int tcp_sock_set_syncnt(struct sock *sk, int val); #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From c488aeadcbd002a992593e6090d54e8ac27c4310 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:22 +0200 Subject: tcp: add tcp_sock_set_user_timeout Add a helper to directly set the TCP_USER_TIMEOUT sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6aa4ae5ebf3d..de682143efe4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -501,5 +501,6 @@ void tcp_sock_set_cork(struct sock *sk, bool on); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); +void tcp_sock_set_user_timeout(struct sock *sk, u32 val); #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From 71c48eb81c9ecb6fed49dc33e7c9b621fdcb7bf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:23 +0200 Subject: tcp: add tcp_sock_set_keepidle Add a helper to directly set the TCP_KEEP_IDLE sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index de682143efe4..5724dd84a85e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); +int tcp_sock_set_keepidle(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); -- cgit v1.2.3 From d41ecaac903c9f4658a71d4e7a708673cfb5abba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:24 +0200 Subject: tcp: add tcp_sock_set_keepintvl Add a helper to directly set the TCP_KEEPINTVL sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5724dd84a85e..1f9bada00faa 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -499,6 +499,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, void tcp_sock_set_cork(struct sock *sk, bool on); int tcp_sock_set_keepidle(struct sock *sk, int val); +int tcp_sock_set_keepintvl(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); void tcp_sock_set_quickack(struct sock *sk, int val); int tcp_sock_set_syncnt(struct sock *sk, int val); -- cgit v1.2.3 From 480aeb9639d6a077c611b303a22f9b1e5937d081 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 May 2020 07:12:25 +0200 Subject: tcp: add tcp_sock_set_keepcnt Add a helper to directly set the TCP_KEEPCNT sockopt from kernel space without going through a fake uaccess. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1f9bada00faa..9aac824c523c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, int shiftlen); void tcp_sock_set_cork(struct sock *sk, bool on); +int tcp_sock_set_keepcnt(struct sock *sk, int val); int tcp_sock_set_keepidle(struct sock *sk, int val); int tcp_sock_set_keepintvl(struct sock *sk, int val); void tcp_sock_set_nodelay(struct sock *sk); -- cgit v1.2.3 From 5a892ff2facb4548c17c05931ed899038a0da63e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 May 2020 14:09:43 +0200 Subject: net: remove kernel_setsockopt No users left. Signed-off-by: Christoph Hellwig Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/net.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 74ef5d7315f7..e10f378194a5 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -303,8 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); int kernel_getsockname(struct socket *sock, struct sockaddr *addr); int kernel_getpeername(struct socket *sock, struct sockaddr *addr); -int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, - unsigned int optlen); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, -- cgit v1.2.3 From 2553f421f44f4db7579f202b79b69046b579c7b5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 27 May 2020 23:16:02 -0700 Subject: net/mlx5: cmd: Fix memset with byte count warning Fix sparse warning: drivers/net/ethernet/mellanox/mlx5/core/cmd.c:1949:15: warning: memset with byte count of 271720 mlx5_cmd_stats array is too big to be held inline in mlx5_cmd. Allocate it separately. Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 6aa6bbd60559..13c0e4556eda 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -298,7 +298,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; + struct mlx5_cmd_stats *stats; }; struct mlx5_port_caps { -- cgit v1.2.3 From 44345c4c130ee3df9b9fbc366d59ab3ac707d7f8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 29 May 2020 00:47:12 -0700 Subject: net/mlx5: IPSec: Fix incorrect type for spi spi is __be32, fix that. Fixes sparse warning: drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c:74:64 warning: incorrect type Signed-off-by: Saeed Mahameed --- include/linux/mlx5/accel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h index b919d143a9a6..96ebaa94a92e 100644 --- a/include/linux/mlx5/accel.h +++ b/include/linux/mlx5/accel.h @@ -76,7 +76,7 @@ struct aes_gcm_keymat { struct mlx5_accel_esp_xfrm_attrs { enum mlx5_accel_esp_action action; u32 esn; - u32 spi; + __be32 spi; u32 seq; u32 tfc_pad; u32 flags; -- cgit v1.2.3 From 372b38ea5911fc2500f0291b00140e80a26c0e36 Mon Sep 17 00:00:00 2001 From: Tova Mussai Date: Thu, 28 May 2020 21:34:26 +0200 Subject: ieee80211: definitions for reduced neighbor reports Add the necessary definitions to parse reduced neighbor report elements. Signed-off-by: Tova Mussai [change struct name, remove IEEE80211_MIN_AP_NEIGHBOR_INFO_SIZE] Link: https://lore.kernel.org/r/20200528213443.4f9154461c06.I518d9898ad982f838112ea9ca14a20d6bbb16394@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0320ca4c7d28..c29184bf9416 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2754,6 +2754,8 @@ enum ieee80211_eid { WLAN_EID_QUIET_CHANNEL = 198, WLAN_EID_OPMODE_NOTIF = 199, + WLAN_EID_REDUCED_NEIGHBOR_REPORT = 201, + WLAN_EID_S1G_BCN_COMPAT = 213, WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214, WLAN_EID_S1G_CAPABILITIES = 217, @@ -3675,4 +3677,30 @@ static inline bool for_each_element_completed(const struct element *element, #define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4) #define WLAN_RSNX_CAPA_SAE_H2E BIT(5) +/* + * reduced neighbor report, based on Draft P802.11ax_D5.0, + * section 9.4.2.170 + */ +#define IEEE80211_AP_INFO_TBTT_HDR_TYPE 0x03 +#define IEEE80211_AP_INFO_TBTT_HDR_FILTERED 0x04 +#define IEEE80211_AP_INFO_TBTT_HDR_COLOC 0x08 +#define IEEE80211_AP_INFO_TBTT_HDR_COUNT 0xF0 +#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM 8 +#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM 12 + +#define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED 0x01 +#define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID 0x02 +#define IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID 0x04 +#define IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID 0x08 +#define IEEE80211_RNR_TBTT_PARAMS_COLOC_ESS 0x10 +#define IEEE80211_RNR_TBTT_PARAMS_PROBE_ACTIVE 0x20 +#define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP 0x40 + +struct ieee80211_neighbor_ap_info { + u8 tbtt_info_hdr; + u8 tbtt_info_len; + u8 op_class; + u8 channel; +} __packed; + #endif /* LINUX_IEEE80211_H */ -- cgit v1.2.3 From 821273a5a502eebaae005557907d122d1e9b7b98 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 May 2020 21:34:27 +0200 Subject: ieee80211: add code to obtain and parse 6 GHz operation field Add some code to obtain and parse the 6 GHz operation field inside the HE operation element. While at it, fix the required length using sizeof() the new struct, which is 5 instead of 4 now. Link: https://lore.kernel.org/r/20200528213443.42ca72c45ca9.Id74bc1b03da9ea6574f9bc70deeb60dfc1634359@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c29184bf9416..2bd9e757167d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2209,6 +2209,28 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 +/** + * ieee80211_he_6ghz_oper - HE 6 GHz operation Information field + * @primary: primary channel + * @control: control flags + * @ccfs0: channel center frequency segment 0 + * @ccfs1: channel center frequency segment 1 + * @minrate: minimum rate (in 1 Mbps units) + */ +struct ieee80211_he_6ghz_oper { + u8 primary; +#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH 0x3 +#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ 0 +#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ 1 +#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ 2 +#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ 3 +#define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4 + u8 control; + u8 ccfs0; + u8 ccfs1; + u8 minrate; +} __packed; + /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte @@ -2235,7 +2257,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) oper_len++; if (he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO) - oper_len += 4; + oper_len += sizeof(struct ieee80211_he_6ghz_oper); /* Add the first byte (extension ID) to the total length */ oper_len++; @@ -2243,6 +2265,34 @@ ieee80211_he_oper_size(const u8 *he_oper_ie) return oper_len; } +/** + * ieee80211_he_6ghz_oper - obtain 6 GHz operation field + * @he_oper: HE operation element (must be pre-validated for size) + * but may be %NULL + * + * Return: a pointer to the 6 GHz operation field, or %NULL + */ +static inline const struct ieee80211_he_6ghz_oper * +ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) +{ + const u8 *ret = (void *)&he_oper->optional; + u32 he_oper_params; + + if (!he_oper) + return NULL; + + he_oper_params = le32_to_cpu(he_oper->he_oper_params); + + if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) + return NULL; + if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO) + ret += 3; + if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) + ret++; + + return (void *)ret; +} + /* HE Spatial Reuse defines */ #define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT 0x4 #define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT 0x8 -- cgit v1.2.3 From 8b30808d9be4183fab17f0b0e68eea88c94ff15a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 May 2020 21:34:28 +0200 Subject: ieee80211: add HE ext EIDs and 6 GHz capability defines Add the HE extended element IDs and the definitions for the HE 6 GHz band capabilities element, from Draft 5.0. Link: https://lore.kernel.org/r/20200528213443.1a6689fe093f.Ifdc5400fb01779351354daf38663ebeea03c9ad9@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 2bd9e757167d..9580dfd9e2d1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2839,9 +2839,19 @@ enum ieee80211_eid_ext { WLAN_EID_EXT_UORA = 37, WLAN_EID_EXT_HE_MU_EDCA = 38, WLAN_EID_EXT_HE_SPR = 39, + WLAN_EID_EXT_NDP_FEEDBACK_REPORT_PARAMSET = 41, + WLAN_EID_EXT_BSS_COLOR_CHG_ANN = 42, + WLAN_EID_EXT_QUIET_TIME_PERIOD_SETUP = 43, + WLAN_EID_EXT_ESS_REPORT = 45, + WLAN_EID_EXT_OPS = 46, + WLAN_EID_EXT_HE_BSS_LOAD = 47, WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52, WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55, WLAN_EID_EXT_NON_INHERITANCE = 56, + WLAN_EID_EXT_KNOWN_BSSID = 57, + WLAN_EID_EXT_SHORT_SSID_LIST = 58, + WLAN_EID_EXT_HE_6GHZ_CAPA = 59, + WLAN_EID_EXT_UL_MU_POWER_CAPA = 60, }; /* Action category code */ @@ -3384,6 +3394,24 @@ struct ieee80211_tspec_ie { __le16 medium_time; } __packed; +struct ieee80211_he_6ghz_capa { + /* uses IEEE80211_HE_6GHZ_CAP_* below */ + __le16 capa; +} __packed; + +/* HE 6 GHz band capabilities */ +/* uses enum ieee80211_min_mpdu_spacing values */ +#define IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START 0x0007 +/* uses enum ieee80211_vht_max_ampdu_length_exp values */ +#define IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP 0x0038 +/* uses IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_* values */ +#define IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN 0x00c0 +/* WLAN_HT_CAP_SM_PS_* values */ +#define IEEE80211_HE_6GHZ_CAP_SM_PS 0x0600 +#define IEEE80211_HE_6GHZ_CAP_RD_RESPONDER 0x0800 +#define IEEE80211_HE_6GHZ_CAP_RX_ANTPAT_CONS 0x1000 +#define IEEE80211_HE_6GHZ_CAP_TX_ANTPAT_CONS 0x2000 + /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame -- cgit v1.2.3 From 3b3ec3d52e8f72ec8c40477b96f23440a89000be Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Thu, 28 May 2020 21:34:37 +0200 Subject: mac80211: check the correct bit for EMA AP An AP supporting EMA (Enhanced Multi-BSSID advertisement) should set bit 83 in the extended capabilities IE (9.4.2.26 in the 802.11ax D5 spec). So the *3rd* bit of the 10th byte should be checked. Also, in one place, the wrong byte was checked. (cfg80211_find_ie returns a pointer to the beginning of the IE, so the data really starts at ie[2], so the 10th byte should be ie[12]. To avoid this confusion, use cfg80211_find_elem instead). Signed-off-by: Shaul Triebitz Link: https://lore.kernel.org/r/20200528213443.4316121fa2a3.I9745582f8d41ad8e689dac0fefcd70b276d7c1ea@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9580dfd9e2d1..1ecfd19f836d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3082,7 +3082,7 @@ enum ieee80211_tdls_actioncode { #define WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT BIT(7) /* Defines support for enhanced multi-bssid advertisement*/ -#define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(1) +#define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(3) /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 -- cgit v1.2.3 From bfad978116c2aa3b693701059923de4561196f9b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 28 May 2020 17:45:02 +0200 Subject: regmap: provide helpers for simple bit operations In many instances regmap_update_bits() is used for simple bit setting and clearing. In these cases the last argument is redundant and we can hide it with a static inline function. This adds three new helpers for simple bit operations: set_bits, clear_bits and test_bits (the last one defined as a regular function). Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- include/linux/regmap.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 40b07168fd8e..ddf0baff195d 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1111,6 +1111,21 @@ bool regmap_reg_in_ranges(unsigned int reg, const struct regmap_range *ranges, unsigned int nranges); +static inline int regmap_set_bits(struct regmap *map, + unsigned int reg, unsigned int bits) +{ + return regmap_update_bits_base(map, reg, bits, bits, + NULL, false, false); +} + +static inline int regmap_clear_bits(struct regmap *map, + unsigned int reg, unsigned int bits) +{ + return regmap_update_bits_base(map, reg, bits, 0, NULL, false, false); +} + +int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits); + /** * struct reg_field - Description of an register field * @@ -1410,6 +1425,27 @@ static inline int regmap_update_bits_base(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_set_bits(struct regmap *map, + unsigned int reg, unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_clear_bits(struct regmap *map, + unsigned int reg, unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + +static inline int regmap_test_bits(struct regmap *map, + unsigned int reg, unsigned int bits) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_field_update_bits_base(struct regmap_field *field, unsigned int mask, unsigned int val, bool *change, bool async, bool force) -- cgit v1.2.3 From 457f44363a8894135c85b7a9afd2bd8196db24ab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 29 May 2020 00:54:20 -0700 Subject: bpf: Implement BPF ring buffer and verifier support for it This commit adds a new MPSC ring buffer implementation into BPF ecosystem, which allows multiple CPUs to submit data to a single shared ring buffer. On the consumption side, only single consumer is assumed. Motivation ---------- There are two distinctive motivators for this work, which are not satisfied by existing perf buffer, which prompted creation of a new ring buffer implementation. - more efficient memory utilization by sharing ring buffer across CPUs; - preserving ordering of events that happen sequentially in time, even across multiple CPUs (e.g., fork/exec/exit events for a task). These two problems are independent, but perf buffer fails to satisfy both. Both are a result of a choice to have per-CPU perf ring buffer. Both can be also solved by having an MPSC implementation of ring buffer. The ordering problem could technically be solved for perf buffer with some in-kernel counting, but given the first one requires an MPSC buffer, the same solution would solve the second problem automatically. Semantics and APIs ------------------ Single ring buffer is presented to BPF programs as an instance of BPF map of type BPF_MAP_TYPE_RINGBUF. Two other alternatives considered, but ultimately rejected. One way would be to, similar to BPF_MAP_TYPE_PERF_EVENT_ARRAY, make BPF_MAP_TYPE_RINGBUF could represent an array of ring buffers, but not enforce "same CPU only" rule. This would be more familiar interface compatible with existing perf buffer use in BPF, but would fail if application needed more advanced logic to lookup ring buffer by arbitrary key. HASH_OF_MAPS addresses this with current approach. Additionally, given the performance of BPF ringbuf, many use cases would just opt into a simple single ring buffer shared among all CPUs, for which current approach would be an overkill. Another approach could introduce a new concept, alongside BPF map, to represent generic "container" object, which doesn't necessarily have key/value interface with lookup/update/delete operations. This approach would add a lot of extra infrastructure that has to be built for observability and verifier support. It would also add another concept that BPF developers would have to familiarize themselves with, new syntax in libbpf, etc. But then would really provide no additional benefits over the approach of using a map. BPF_MAP_TYPE_RINGBUF doesn't support lookup/update/delete operations, but so doesn't few other map types (e.g., queue and stack; array doesn't support delete, etc). The approach chosen has an advantage of re-using existing BPF map infrastructure (introspection APIs in kernel, libbpf support, etc), being familiar concept (no need to teach users a new type of object in BPF program), and utilizing existing tooling (bpftool). For common scenario of using a single ring buffer for all CPUs, it's as simple and straightforward, as would be with a dedicated "container" object. On the other hand, by being a map, it can be combined with ARRAY_OF_MAPS and HASH_OF_MAPS map-in-maps to implement a wide variety of topologies, from one ring buffer for each CPU (e.g., as a replacement for perf buffer use cases), to a complicated application hashing/sharding of ring buffers (e.g., having a small pool of ring buffers with hashed task's tgid being a look up key to preserve order, but reduce contention). Key and value sizes are enforced to be zero. max_entries is used to specify the size of ring buffer and has to be a power of 2 value. There are a bunch of similarities between perf buffer (BPF_MAP_TYPE_PERF_EVENT_ARRAY) and new BPF ring buffer semantics: - variable-length records; - if there is no more space left in ring buffer, reservation fails, no blocking; - memory-mappable data area for user-space applications for ease of consumption and high performance; - epoll notifications for new incoming data; - but still the ability to do busy polling for new data to achieve the lowest latency, if necessary. BPF ringbuf provides two sets of APIs to BPF programs: - bpf_ringbuf_output() allows to *copy* data from one place to a ring buffer, similarly to bpf_perf_event_output(); - bpf_ringbuf_reserve()/bpf_ringbuf_commit()/bpf_ringbuf_discard() APIs split the whole process into two steps. First, a fixed amount of space is reserved. If successful, a pointer to a data inside ring buffer data area is returned, which BPF programs can use similarly to a data inside array/hash maps. Once ready, this piece of memory is either committed or discarded. Discard is similar to commit, but makes consumer ignore the record. bpf_ringbuf_output() has disadvantage of incurring extra memory copy, because record has to be prepared in some other place first. But it allows to submit records of the length that's not known to verifier beforehand. It also closely matches bpf_perf_event_output(), so will simplify migration significantly. bpf_ringbuf_reserve() avoids the extra copy of memory by providing a memory pointer directly to ring buffer memory. In a lot of cases records are larger than BPF stack space allows, so many programs have use extra per-CPU array as a temporary heap for preparing sample. bpf_ringbuf_reserve() avoid this needs completely. But in exchange, it only allows a known constant size of memory to be reserved, such that verifier can verify that BPF program can't access memory outside its reserved record space. bpf_ringbuf_output(), while slightly slower due to extra memory copy, covers some use cases that are not suitable for bpf_ringbuf_reserve(). The difference between commit and discard is very small. Discard just marks a record as discarded, and such records are supposed to be ignored by consumer code. Discard is useful for some advanced use-cases, such as ensuring all-or-nothing multi-record submission, or emulating temporary malloc()/free() within single BPF program invocation. Each reserved record is tracked by verifier through existing reference-tracking logic, similar to socket ref-tracking. It is thus impossible to reserve a record, but forget to submit (or discard) it. bpf_ringbuf_query() helper allows to query various properties of ring buffer. Currently 4 are supported: - BPF_RB_AVAIL_DATA returns amount of unconsumed data in ring buffer; - BPF_RB_RING_SIZE returns the size of ring buffer; - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical possition of consumer/producer, respectively. Returned values are momentarily snapshots of ring buffer state and could be off by the time helper returns, so this should be used only for debugging/reporting reasons or for implementing various heuristics, that take into account highly-changeable nature of some of those characteristics. One such heuristic might involve more fine-grained control over poll/epoll notifications about new data availability in ring buffer. Together with BPF_RB_NO_WAKEUP/BPF_RB_FORCE_WAKEUP flags for output/commit/discard helpers, it allows BPF program a high degree of control and, e.g., more efficient batched notifications. Default self-balancing strategy, though, should be adequate for most applications and will work reliable and efficiently already. Design and implementation ------------------------- This reserve/commit schema allows a natural way for multiple producers, either on different CPUs or even on the same CPU/in the same BPF program, to reserve independent records and work with them without blocking other producers. This means that if BPF program was interruped by another BPF program sharing the same ring buffer, they will both get a record reserved (provided there is enough space left) and can work with it and submit it independently. This applies to NMI context as well, except that due to using a spinlock during reservation, in NMI context, bpf_ringbuf_reserve() might fail to get a lock, in which case reservation will fail even if ring buffer is not full. The ring buffer itself internally is implemented as a power-of-2 sized circular buffer, with two logical and ever-increasing counters (which might wrap around on 32-bit architectures, that's not a problem): - consumer counter shows up to which logical position consumer consumed the data; - producer counter denotes amount of data reserved by all producers. Each time a record is reserved, producer that "owns" the record will successfully advance producer counter. At that point, data is still not yet ready to be consumed, though. Each record has 8 byte header, which contains the length of reserved record, as well as two extra bits: busy bit to denote that record is still being worked on, and discard bit, which might be set at commit time if record is discarded. In the latter case, consumer is supposed to skip the record and move on to the next one. Record header also encodes record's relative offset from the beginning of ring buffer data area (in pages). This allows bpf_ringbuf_commit()/bpf_ringbuf_discard() to accept only the pointer to the record itself, without requiring also the pointer to ring buffer itself. Ring buffer memory location will be restored from record metadata header. This significantly simplifies verifier, as well as improving API usability. Producer counter increments are serialized under spinlock, so there is a strict ordering between reservations. Commits, on the other hand, are completely lockless and independent. All records become available to consumer in the order of reservations, but only after all previous records where already committed. It is thus possible for slow producers to temporarily hold off submitted records, that were reserved later. Reservation/commit/consumer protocol is verified by litmus tests in Documentation/litmus-test/bpf-rb. One interesting implementation bit, that significantly simplifies (and thus speeds up as well) implementation of both producers and consumers is how data area is mapped twice contiguously back-to-back in the virtual memory. This allows to not take any special measures for samples that have to wrap around at the end of the circular buffer data area, because the next page after the last data page would be first data page again, and thus the sample will still appear completely contiguous in virtual memory. See comment and a simple ASCII diagram showing this visually in bpf_ringbuf_area_alloc(). Another feature that distinguishes BPF ringbuf from perf ring buffer is a self-pacing notifications of new data being availability. bpf_ringbuf_commit() implementation will send a notification of new record being available after commit only if consumer has already caught up right up to the record being committed. If not, consumer still has to catch up and thus will see new data anyways without needing an extra poll notification. Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c) show that this allows to achieve a very high throughput without having to resort to tricks like "notify only every Nth sample", which are necessary with perf buffer. For extreme cases, when BPF program wants more manual control of notifications, commit/discard/output helpers accept BPF_RB_NO_WAKEUP and BPF_RB_FORCE_WAKEUP flags, which give full control over notifications of data availability, but require extra caution and diligence in using this API. Comparison to alternatives -------------------------- Before considering implementing BPF ring buffer from scratch existing alternatives in kernel were evaluated, but didn't seem to meet the needs. They largely fell into few categores: - per-CPU buffers (perf, ftrace, etc), which don't satisfy two motivations outlined above (ordering and memory consumption); - linked list-based implementations; while some were multi-producer designs, consuming these from user-space would be very complicated and most probably not performant; memory-mapping contiguous piece of memory is simpler and more performant for user-space consumers; - io_uring is SPSC, but also requires fixed-sized elements. Naively turning SPSC queue into MPSC w/ lock would have subpar performance compared to locked reserve + lockless commit, as with BPF ring buffer. Fixed sized elements would be too limiting for BPF programs, given existing BPF programs heavily rely on variable-sized perf buffer already; - specialized implementations (like a new printk ring buffer, [0]) with lots of printk-specific limitations and implications, that didn't seem to fit well for intended use with BPF programs. [0] https://lwn.net/Articles/779550/ Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200529075424.3139988-2-andriin@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 13 +++++++++++++ include/linux/bpf_types.h | 1 + include/linux/bpf_verifier.h | 4 ++++ 3 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index efe8836b5c48..e5884f7f801c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -90,6 +90,8 @@ struct bpf_map_ops { int (*map_direct_value_meta)(const struct bpf_map *map, u64 imm, u32 *off); int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); + __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, + struct poll_table_struct *pts); }; struct bpf_map_memory { @@ -244,6 +246,9 @@ enum bpf_arg_type { ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ + ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ + ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ + ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ }; /* type of values returned from helper functions */ @@ -255,6 +260,7 @@ enum bpf_return_type { RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ + RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@ -322,6 +328,8 @@ enum bpf_reg_type { PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ PTR_TO_BTF_ID, /* reg points to kernel struct */ PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ + PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -1611,6 +1619,11 @@ extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_event_output_data_proto; +extern const struct bpf_func_proto bpf_ringbuf_output_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_proto; +extern const struct bpf_func_proto bpf_ringbuf_query_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 29d22752fc87..fa8e1b552acd 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -118,6 +118,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) #if defined(CONFIG_BPF_JIT) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ea833087e853..ca08db4ffb5f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -54,6 +54,8 @@ struct bpf_reg_state { u32 btf_id; /* for PTR_TO_BTF_ID */ + u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* Max size from any of the above. */ unsigned long raw; }; @@ -63,6 +65,8 @@ struct bpf_reg_state { * offset, so they can share range knowledge. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we * came from, when one is tested for != NULL. + * For PTR_TO_MEM_OR_NULL this is used to identify memory allocation + * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. */ -- cgit v1.2.3 From fbee97feed9b3e4acdf9590e1f6b4a2eefecfffe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 29 May 2020 16:07:13 -0600 Subject: bpf: Add support to attach bpf program to a devmap entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add BPF_XDP_DEVMAP attach type for use with programs associated with a DEVMAP entry. Allow DEVMAPs to associate a program with a device entry by adding a bpf_prog.fd to 'struct bpf_devmap_val'. Values read show the program id, so the fd and id are a union. bpf programs can get access to the struct via vmlinux.h. The program associated with the fd must have type XDP with expected attach type BPF_XDP_DEVMAP. When a program is associated with a device index, the program is run on an XDP_REDIRECT and before the buffer is added to the per-cpu queue. At this point rxq data is still valid; the next patch adds tx device information allowing the prorgam to see both ingress and egress device indices. XDP generic is skb based and XDP programs do not work with skb's. Block the use case by walking maps used by a program that is to be attached via xdpgeneric and fail if any of them are DEVMAP / DEVMAP_HASH with Block attach of BPF_XDP_DEVMAP programs to devices. Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200529220716.75383-3-dsahern@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e5884f7f801c..e042311f991f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1250,6 +1250,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); +bool dev_map_can_have_prog(struct bpf_map *map); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); @@ -1363,6 +1364,10 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map { return NULL; } +static inline bool dev_map_can_have_prog(struct bpf_map *map) +{ + return false; +} static inline void __dev_flush(void) { -- cgit v1.2.3 From e91de6afa81c10e9f855c5695eb9a53168d96b73 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 29 May 2020 16:06:59 -0700 Subject: bpf: Fix running sk_skb program types with ktls KTLS uses a stream parser to collect TLS messages and send them to the upper layer tls receive handler. This ensures the tls receiver has a full TLS header to parse when it is run. However, when a socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS is enabled we end up with two stream parsers running on the same socket. The result is both try to run on the same socket. First the KTLS stream parser runs and calls read_sock() which will tcp_read_sock which in turn calls tcp_rcv_skb(). This dequeues the skb from the sk_receive_queue. When this is done KTLS code then data_ready() callback which because we stacked KTLS on top of the bpf stream verdict program has been replaced with sk_psock_start_strp(). This will in turn kick the stream parser again and eventually do the same thing KTLS did above calling into tcp_rcv_skb() and dequeuing a skb from the sk_receive_queue. At this point the data stream is broke. Part of the stream was handled by the KTLS side some other bytes may have been handled by the BPF side. Generally this results in either missing data or more likely a "Bad Message" complaint from the kTLS receive handler as the BPF program steals some bytes meant to be in a TLS header and/or the TLS header length is no longer correct. We've already broke the idealized model where we can stack ULPs in any order with generic callbacks on the TX side to handle this. So in this patch we do the same thing but for RX side. We add a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict program is running and add a tls_sw_has_ctx_rx() helper so BPF side can learn there is a TLS ULP on the socket. Then on BPF side we omit calling our stream parser to avoid breaking the data stream for the KTLS receiver. Then on the KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS receiver is done with the packet but before it posts the msg to userspace. This gives us symmetry between the TX and RX halfs and IMO makes it usable again. On the TX side we process packets in this order BPF -> TLS -> TCP and on the receive side in the reverse order TCP -> TLS -> BPF. Discovered while testing OpenSSL 3.0 Alpha2.0 release. Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/159079361946.5745.605854335665044485.stgit@john-Precision-5820-Tower Signed-off-by: Alexei Starovoitov --- include/linux/skmsg.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index ad31c9fb7158..08674cd14d5a 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs) psock_set_prog(&progs->skb_verdict, NULL); } +int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); + +static inline bool sk_psock_strp_enabled(struct sk_psock *psock) +{ + if (!psock) + return false; + return psock->parser.enabled; +} #endif /* _LINUX_SKMSG_H */ -- cgit v1.2.3 From 958a3f2d2aff896ae2a622878e456114f4a4cd15 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 31 May 2020 17:42:55 +0200 Subject: bpf: Use tracing helpers for lsm programs Currenty lsm uses bpf_tracing_func_proto helpers which do not include stack trace or perf event output. It's useful to have those for bpftrace lsm support [1]. Using tracing_prog_func_proto helpers for lsm programs. [1] https://github.com/iovisor/bpftrace/pull/1347 Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Cc: KP Singh Link: https://lore.kernel.org/bpf/20200531154255.896551-1-jolsa@kernel.org --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e042311f991f..07052d44bca1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1633,6 +1633,9 @@ extern const struct bpf_func_proto bpf_ringbuf_query_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); +const struct bpf_func_proto *tracing_prog_func_proto( + enum bpf_func_id func_id, const struct bpf_prog *prog); + /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -- cgit v1.2.3 From a3fd7ceee05431d2c51ed86c6cae015d236a51f0 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 31 May 2020 10:28:36 +0200 Subject: net: Introduce netns_bpf for BPF programs attached to netns In order to: (1) attach more than one BPF program type to netns, or (2) support attaching BPF programs to netns with bpf_link, or (3) support multi-prog attach points for netns we will need to keep more state per netns than a single pointer like we have now for BPF flow dissector program. Prepare for the above by extracting netns_bpf that is part of struct net, for storing all state related to BPF programs attached to netns. Turn flow dissector callbacks for querying/attaching/detaching a program into generic ones that operate on netns_bpf. Next patch will move the generic callbacks into their own module. This is similar to how it is organized for cgroup with cgroup_bpf. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Cc: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20200531082846.2117903-3-jakub@cloudflare.com --- include/linux/bpf-netns.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/skbuff.h | 26 ---------------------- 2 files changed, 56 insertions(+), 26 deletions(-) create mode 100644 include/linux/bpf-netns.h (limited to 'include/linux') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h new file mode 100644 index 000000000000..f3aec3d79824 --- /dev/null +++ b/include/linux/bpf-netns.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_NETNS_H +#define _BPF_NETNS_H + +#include +#include + +enum netns_bpf_attach_type { + NETNS_BPF_INVALID = -1, + NETNS_BPF_FLOW_DISSECTOR = 0, + MAX_NETNS_BPF_ATTACH_TYPE +}; + +static inline enum netns_bpf_attach_type +to_netns_bpf_attach_type(enum bpf_attach_type attach_type) +{ + switch (attach_type) { + case BPF_FLOW_DISSECTOR: + return NETNS_BPF_FLOW_DISSECTOR; + default: + return NETNS_BPF_INVALID; + } +} + +/* Protects updates to netns_bpf */ +extern struct mutex netns_bpf_mutex; + +union bpf_attr; +struct bpf_prog; + +#ifdef CONFIG_NET +int netns_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); +int netns_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog); +int netns_bpf_prog_detach(const union bpf_attr *attr); +#else +static inline int netns_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EOPNOTSUPP; +} + +static inline int netns_bpf_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline int netns_bpf_prog_detach(const union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} +#endif + +#endif /* _BPF_NETNS_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 531843952809..a0d5c2760103 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1283,32 +1283,6 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); -#ifdef CONFIG_NET -int skb_flow_dissector_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr); -int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, - struct bpf_prog *prog); - -int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr); -#else -static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr, - union bpf_attr __user *uattr) -{ - return -EOPNOTSUPP; -} - -static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, - struct bpf_prog *prog) -{ - return -EOPNOTSUPP; -} - -static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) -{ - return -EOPNOTSUPP; -} -#endif - struct bpf_flow_dissector; bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, __be16 proto, int nhoff, int hlen, unsigned int flags); -- cgit v1.2.3 From 7f045a49fee04b5662cbdeaf0838f9322ae8c63a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 31 May 2020 10:28:38 +0200 Subject: bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com --- include/linux/bpf-netns.h | 8 ++++++++ include/linux/bpf_types.h | 3 +++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index f3aec3d79824..4052d649f36d 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -34,6 +34,8 @@ int netns_bpf_prog_query(const union bpf_attr *attr, int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int netns_bpf_prog_detach(const union bpf_attr *attr); +int netns_bpf_link_create(const union bpf_attr *attr, + struct bpf_prog *prog); #else static inline int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -51,6 +53,12 @@ static inline int netns_bpf_prog_detach(const union bpf_attr *attr) { return -EOPNOTSUPP; } + +static inline int netns_bpf_link_create(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif #endif /* _BPF_NETNS_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa8e1b552acd..a18ae82a298a 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -126,3 +126,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) +#ifdef CONFIG_NET +BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) +#endif -- cgit v1.2.3 From 836e66c218f355ec01ba57671c85abf32961dcea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Jun 2020 16:58:32 +0200 Subject: bpf: Fix up bpf_skb_adjust_room helper's skb csum setting Lorenz recently reported: In our TC classifier cls_redirect [0], we use the following sequence of helper calls to decapsulate a GUE (basically IP + UDP + custom header) encapsulated packet: bpf_skb_adjust_room(skb, -encap_len, BPF_ADJ_ROOM_MAC, BPF_F_ADJ_ROOM_FIXED_GSO) bpf_redirect(skb->ifindex, BPF_F_INGRESS) It seems like some checksums of the inner headers are not validated in this case. For example, a TCP SYN packet with invalid TCP checksum is still accepted by the network stack and elicits a SYN ACK. [...] That is, we receive the following packet from the driver: | ETH | IP | UDP | GUE | IP | TCP | skb->ip_summed == CHECKSUM_UNNECESSARY ip_summed is CHECKSUM_UNNECESSARY because our NICs do rx checksum offloading. On this packet we run skb_adjust_room_mac(-encap_len), and get the following: | ETH | IP | TCP | skb->ip_summed == CHECKSUM_UNNECESSARY Note that ip_summed is still CHECKSUM_UNNECESSARY. After bpf_redirect()'ing into the ingress, we end up in tcp_v4_rcv(). There, skb_checksum_init() is turned into a no-op due to CHECKSUM_UNNECESSARY. The bpf_skb_adjust_room() helper is not aware of protocol specifics. Internally, it handles the CHECKSUM_COMPLETE case via skb_postpull_rcsum(), but that does not cover CHECKSUM_UNNECESSARY. In this case skb->csum_level of the original skb prior to bpf_skb_adjust_room() call was 0, that is, covering UDP. Right now there is no way to adjust the skb->csum_level. NICs that have checksum offload disabled (CHECKSUM_NONE) or that support CHECKSUM_COMPLETE are not affected. Use a safe default for CHECKSUM_UNNECESSARY by resetting to CHECKSUM_NONE and add a flag to the helper called BPF_F_ADJ_ROOM_NO_CSUM_RESET that allows users from opting out. Opting out is useful for the case where we don't remove/add full protocol headers, or for the case where a user wants to adjust the csum level manually e.g. through bpf_csum_level() helper that is added in subsequent patch. The bpf_skb_proto_{4_to_6,6_to_4}() for NAT64/46 translation from the BPF bpf_skb_change_proto() helper uses bpf_skb_net_hdr_{push,pop}() pair internally as well but doesn't change layers, only transitions between v4 to v6 and vice versa, therefore no adoption is required there. [0] https://lore.kernel.org/bpf/20200424185556.7358-1-lmb@cloudflare.com/ Fixes: 2be7e212d541 ("bpf: add bpf_skb_adjust_room helper") Reported-by: Lorenz Bauer Reported-by: Alan Maguire Signed-off-by: Daniel Borkmann Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/CACAyw9-uU_52esMd1JjuA80fRPHJv5vsSg8GnfW3t_qDU4aVKQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/11a90472e7cce83e76ddbfce81fdfce7bfc68808.1591108731.git.daniel@iogearbox.net --- include/linux/skbuff.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a0d5c2760103..0c0377fc00c2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3919,6 +3919,14 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) } } +static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + skb->ip_summed = CHECKSUM_NONE; + skb->csum_level = 0; + } +} + /* Check if we need to perform checksum complete validation. * * Returns true if checksum complete is needed, false otherwise -- cgit v1.2.3