From 730fffce4fd2eb7a0be2d0b6cd7e55e9194d76d5 Mon Sep 17 00:00:00 2001 From: Jian Wen Date: Wed, 27 Mar 2024 16:21:28 +0800 Subject: devlink: use kvzalloc() to allocate devlink instance resources During live migration of a virtual machine, the SR-IOV VF need to be re-registered. It may fail when the memory is badly fragmented. The related log is as follows. kernel: hv_netvsc 6045bdaa-c0d1-6045-bdaa-c0d16045bdaa eth0: VF slot 1 added ... kernel: kworker/0:0: page allocation failure: order:7, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0 kernel: CPU: 0 PID: 24006 Comm: kworker/0:0 Tainted: G E 5.4...x86_64 #1 kernel: Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090008 12/07/2018 kernel: Workqueue: events work_for_cpu_fn kernel: Call Trace: kernel: dump_stack+0x8b/0xc8 kernel: warn_alloc+0xff/0x170 kernel: __alloc_pages_slowpath+0x92c/0xb2b kernel: ? get_page_from_freelist+0x1d4/0x1140 kernel: __alloc_pages_nodemask+0x2f9/0x320 kernel: alloc_pages_current+0x6a/0xb0 kernel: kmalloc_order+0x1e/0x70 kernel: kmalloc_order_trace+0x26/0xb0 kernel: ? __switch_to_asm+0x34/0x70 kernel: __kmalloc+0x276/0x280 kernel: ? _raw_spin_unlock_irqrestore+0x1e/0x40 kernel: devlink_alloc+0x29/0x110 kernel: mlx5_devlink_alloc+0x1a/0x20 [mlx5_core] kernel: init_one+0x1d/0x650 [mlx5_core] kernel: local_pci_probe+0x46/0x90 kernel: work_for_cpu_fn+0x1a/0x30 kernel: process_one_work+0x16d/0x390 kernel: worker_thread+0x1d3/0x3f0 kernel: kthread+0x105/0x140 kernel: ? max_active_store+0x80/0x80 kernel: ? kthread_bind+0x20/0x20 kernel: ret_from_fork+0x3a/0x50 Signed-off-by: Jian Wen Link: https://lore.kernel.org/r/20240327082128.942818-1-wenjian1@xiaomi.com Signed-off-by: Jakub Kicinski --- net/devlink/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/devlink') diff --git a/net/devlink/core.c b/net/devlink/core.c index 7f0b093208d7..f49cd83f1955 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -314,7 +314,7 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); put_device(devlink->dev); - kfree(devlink); + kvfree(devlink); } void devlink_put(struct devlink *devlink) @@ -420,7 +420,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (!devlink_reload_actions_valid(ops)) return NULL; - devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); + devlink = kvzalloc(struct_size(devlink, priv, priv_size), GFP_KERNEL); if (!devlink) return NULL; @@ -455,7 +455,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, return devlink; err_xa_alloc: - kfree(devlink); + kvfree(devlink); return NULL; } EXPORT_SYMBOL_GPL(devlink_alloc_ns); -- cgit v1.2.3 From e8058a49e67fe7bc7e4a0308851a3ca3a6d2e45d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Mar 2024 20:31:45 +0100 Subject: netlink: introduce type-checking attribute iteration There are, especially with multi-attr arrays, many cases of needing to iterate all attributes of a specific type in a netlink message or a nested attribute. Add specific macros to support that case. Also convert many instances using this spatch: @@ iterator nla_for_each_attr; iterator name nla_for_each_attr_type; identifier nla; expression head, len, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_attr(nla, head, len, rem) +nla_for_each_attr_type(nla, ATTR, head, len, rem) { <... T x; ...> -if (nla_type(nla) == ATTR) { ... -} } @@ identifier nla; iterator nla_for_each_nested; iterator name nla_for_each_nested_type; expression attr, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_nested(nla, attr, rem) +nla_for_each_nested_type(nla, ATTR, attr, rem) { <... T x; ...> -if (nla_type(nla) == ATTR) { ... -} } @@ iterator nla_for_each_attr; iterator name nla_for_each_attr_type; identifier nla; expression head, len, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_attr(nla, head, len, rem) +nla_for_each_attr_type(nla, ATTR, head, len, rem) { <... T x; ...> -if (nla_type(nla) != ATTR) continue; ... } @@ identifier nla; iterator nla_for_each_nested; iterator name nla_for_each_nested_type; expression attr, rem; expression ATTR; type T; identifier x; @@ -nla_for_each_nested(nla, attr, rem) +nla_for_each_nested_type(nla, ATTR, attr, rem) { <... T x; ...> -if (nla_type(nla) != ATTR) continue; ... } Although I had to undo one bad change this made, and I also adjusted some other code for whitespace and to use direct variable initialization now. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20240328203144.b5a6c895fb80.I1869b44767379f204998ff44dd239803f39c23e0@changeid Signed-off-by: Jakub Kicinski --- net/devlink/dev.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net/devlink') diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 19dbf540748a..c609deb42e88 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -1202,17 +1202,13 @@ static void __devlink_compat_running_version(struct devlink *devlink, if (err) goto free_msg; - nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) { + nla_for_each_attr_type(nlattr, DEVLINK_ATTR_INFO_VERSION_RUNNING, + (void *)msg->data, msg->len, rem) { const struct nlattr *kv; int rem_kv; - if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING) - continue; - - nla_for_each_nested(kv, nlattr, rem_kv) { - if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE) - continue; - + nla_for_each_nested_type(kv, DEVLINK_ATTR_INFO_VERSION_VALUE, + nlattr, rem_kv) { strlcat(buf, nla_data(kv), len); strlcat(buf, " ", len); } -- cgit v1.2.3 From 8e69b3459ca1ed4f6f7bd0b0a11962ddb3e7d34a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 3 Apr 2024 13:22:59 -0700 Subject: netlink: add nlmsg_consume() and use it in devlink compat devlink_compat_running_version() sticks out when running netdevsim tests and watching dropped skbs. Add nlmsg_consume() for cases were we want to free a netlink skb but it is expected, rather than a drop. af_netlink code uses consume_skb() directly, which is fine, but some may prefer the symmetry of nlmsg_new() / nlmsg_consume(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/devlink/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/devlink') diff --git a/net/devlink/dev.c b/net/devlink/dev.c index c609deb42e88..13c73f50da3d 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -1214,7 +1214,7 @@ static void __devlink_compat_running_version(struct devlink *devlink, } } free_msg: - nlmsg_free(msg); + nlmsg_consume(msg); } void devlink_compat_running_version(struct devlink *devlink, -- cgit v1.2.3 From 5af3e3876d567fb79a355bec1cb48e432d69b4fb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 6 Apr 2024 04:05:37 +0300 Subject: devlink: Support setting max_io_eqs Many devices send event notifications for the IO queues, such as tx and rx queues, through event queues. Enable a privileged owner, such as a hypervisor PF, to set the number of IO event queues for the VF and SF during the provisioning stage. example: Get maximum IO event queues of the VF device:: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 10 Set maximum IO event queues of the VF device:: $ devlink port function set pci/0000:06:00.0/2 max_io_eqs 32 $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 ipsec_packet disabled max_io_eqs 32 Reviewed-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Parav Pandit Signed-off-by: David S. Miller --- net/devlink/port.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'net/devlink') diff --git a/net/devlink/port.c b/net/devlink/port.c index 118d130d2afd..be9158b4453c 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -16,6 +16,7 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), + [DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] = { .type = NLA_U32 }, }; #define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \ @@ -182,6 +183,30 @@ static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port, return 0; } +static int devlink_port_fn_max_io_eqs_fill(struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) +{ + u32 max_io_eqs; + int err; + + if (!port->ops->port_fn_max_io_eqs_get) + return 0; + + err = port->ops->port_fn_max_io_eqs_get(port, &max_io_eqs, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = nla_put_u32(msg, DEVLINK_PORT_FN_ATTR_MAX_IO_EQS, max_io_eqs); + if (err) + return err; + *msg_updated = true; + return 0; +} + int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port) { if (devlink_nl_put_handle(msg, devlink_port->devlink)) @@ -409,6 +434,18 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, return 0; } +static int +devlink_port_fn_max_io_eqs_set(struct devlink_port *devlink_port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + u32 max_io_eqs; + + max_io_eqs = nla_get_u32(attr); + return devlink_port->ops->port_fn_max_io_eqs_set(devlink_port, + max_io_eqs, extack); +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -428,6 +465,9 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (err) goto out; err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_port_fn_max_io_eqs_fill(port, msg, extack, &msg_updated); if (err) goto out; err = devlink_rel_devlink_handle_put(msg, port->devlink, @@ -726,6 +766,12 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, } } } + if (tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS] && + !ops->port_fn_max_io_eqs_set) { + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS], + "Function does not support max_io_eqs setting"); + return -EOPNOTSUPP; + } return 0; } @@ -761,6 +807,13 @@ static int devlink_port_function_set(struct devlink_port *port, return err; } + attr = tb[DEVLINK_PORT_FN_ATTR_MAX_IO_EQS]; + if (attr) { + err = devlink_port_fn_max_io_eqs_set(port, attr, extack); + if (err) + return err; + } + /* Keep this as the last function attribute set, so that when * multiple port function attributes are set along with state, * Those can be applied first before activating the state. -- cgit v1.2.3 From 5625ca5640caa3fb797f155601d56379d260d6ba Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Fri, 19 Apr 2024 04:08:49 -0400 Subject: devlink: extend devlink_param *set pointer Extend devlink_param *set function pointer to take extack as a param. Sometimes it is needed to pass information to the end user from set function. It is more proper to use for that netlink instead of passing message to dmesg. Reviewed-by: Jiri Pirko Reviewed-by: Przemek Kitszel Signed-off-by: Mateusz Polchlopek Signed-off-by: Tony Nguyen --- net/devlink/param.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/devlink') diff --git a/net/devlink/param.c b/net/devlink/param.c index 22bc3b500518..dcf0d1ccebba 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -158,11 +158,12 @@ static int devlink_param_get(struct devlink *devlink, static int devlink_param_set(struct devlink *devlink, const struct devlink_param *param, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { if (!param->set) return -EOPNOTSUPP; - return param->set(devlink, param->id, ctx); + return param->set(devlink, param->id, ctx, extack); } static int @@ -571,7 +572,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return -EOPNOTSUPP; ctx.val = value; ctx.cmode = cmode; - err = devlink_param_set(devlink, param, &ctx); + err = devlink_param_set(devlink, param, &ctx, info->extack); if (err) return err; } -- cgit v1.2.3