From 2c2b0d880f1b4c01f30e14242977b82fa527342d Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 23 Jul 2020 23:09:57 -0400 Subject: drm/amdkfd: Add thermal throttling SMI event Add support for reporting thermal throttling events through SMI. Also, add a counter to count the number of throttling interrupts observed and report the count in the SMI event message. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f738c3b53f4e..df6c7a43aadc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -450,7 +450,8 @@ struct kfd_ioctl_import_dmabuf_args { * KFD SMI(System Management Interface) events */ /* Event type (defined by bitmask) */ -#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 +#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 +#define KFD_SMI_EVENT_THERMAL_THROTTLE 0x0000000000000002 struct kfd_ioctl_smi_events_args { __u32 gpuid; /* to KFD */ -- cgit v1.2.3 From 522ec6e0eed0ab0678e7d5b5bf00487dfe83f7ce Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 30 Jul 2020 18:04:33 -0400 Subject: drm/amdkfd: Replace bitmask with event idx in SMI event msg Event bitmask is a 64-bit mask with only 1 bit set. Sending this event bitmask in KFD SMI event message is both wasteful of memory and potentially limiting to only 64 events. Instead send event index in SMI event message. Please note this change does not break the ABI for the two event types defined so far. The new index is identical to the mask used before. Signed-off-by: Mukul Joshi Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index df6c7a43aadc..cb1f963a84e0 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -449,9 +449,13 @@ struct kfd_ioctl_import_dmabuf_args { /* * KFD SMI(System Management Interface) events */ -/* Event type (defined by bitmask) */ -#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 -#define KFD_SMI_EVENT_THERMAL_THROTTLE 0x0000000000000002 +enum kfd_smi_event { + KFD_SMI_EVENT_NONE = 0, /* not used */ + KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ + KFD_SMI_EVENT_THERMAL_THROTTLE = 2, +}; + +#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) struct kfd_ioctl_smi_events_args { __u32 gpuid; /* to KFD */ -- cgit v1.2.3 From 592d9fba33c275b72cb4dae99c187444daafcd33 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 18 Aug 2020 16:13:42 +0900 Subject: virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature This feature allows the guest to request a UUID from the host for a particular virtio_gpu resource. The UUID can then be shared with other virtio devices, to allow the other host devices to access the virtio_gpu's corresponding host resource. Signed-off-by: David Stevens Acked-by: Michael S. Tsirkin Link: http://patchwork.freedesktop.org/patch/msgid/20200818071343.3461203-3-stevensd@chromium.org Signed-off-by: Gerd Hoffmann --- include/uapi/linux/virtio_gpu.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..9721d58b4d58 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -50,6 +50,10 @@ * VIRTIO_GPU_CMD_GET_EDID */ #define VIRTIO_GPU_F_EDID 1 +/* + * VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID + */ +#define VIRTIO_GPU_F_RESOURCE_UUID 2 enum virtio_gpu_ctrl_type { VIRTIO_GPU_UNDEFINED = 0, @@ -66,6 +70,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_CMD_GET_CAPSET_INFO, VIRTIO_GPU_CMD_GET_CAPSET, VIRTIO_GPU_CMD_GET_EDID, + VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID, /* 3d commands */ VIRTIO_GPU_CMD_CTX_CREATE = 0x0200, @@ -87,6 +92,7 @@ enum virtio_gpu_ctrl_type { VIRTIO_GPU_RESP_OK_CAPSET_INFO, VIRTIO_GPU_RESP_OK_CAPSET, VIRTIO_GPU_RESP_OK_EDID, + VIRTIO_GPU_RESP_OK_RESOURCE_UUID, /* error responses */ VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200, @@ -340,4 +346,17 @@ enum virtio_gpu_formats { VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM = 134, }; +/* VIRTIO_GPU_CMD_RESOURCE_ASSIGN_UUID */ +struct virtio_gpu_resource_assign_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __le32 resource_id; + __le32 padding; +}; + +/* VIRTIO_GPU_RESP_OK_RESOURCE_UUID */ +struct virtio_gpu_resp_resource_uuid { + struct virtio_gpu_ctrl_hdr hdr; + __u8 uuid[16]; +}; + #endif -- cgit v1.2.3 From aa207a05f95abc3530b7415232f0f73278336bd3 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 18 Aug 2020 14:51:45 +0300 Subject: mei: add connect with vtag ioctl This IOCTL is used to associate the current file descriptor with a FW Client (given by UUID), and virtual tag (vtag). The IOCTL opens a communication channel between a host client and a FW client on a tagged channel. From this point on, every reader and write will communicate with the associated FW client on the tagged channel. Upon close() the communication is terminated. The IOCTL argument is a struct with a union that contains the input parameter and the output parameter for this IOCTL. The input parameter is UUID of the FW Client, a vtag [0,255] The output parameter is the properties of the FW client Clients that do not support tagged connection will respond with -EOPNOTSUPP Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20200818115147.2567012-12-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/mei.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mei.h b/include/uapi/linux/mei.h index c6aec86cc5de..4f3638489d01 100644 --- a/include/uapi/linux/mei.h +++ b/include/uapi/linux/mei.h @@ -66,4 +66,53 @@ struct mei_connect_client_data { */ #define IOCTL_MEI_NOTIFY_GET _IOR('H', 0x03, __u32) +/** + * struct mei_connect_client_vtag - mei client information struct with vtag + * + * @in_client_uuid: UUID of client to connect + * @vtag: virtual tag + * @reserved: reserved for future use + */ +struct mei_connect_client_vtag { + uuid_le in_client_uuid; + __u8 vtag; + __u8 reserved[3]; +}; + +/** + * struct mei_connect_client_data_vtag - IOCTL connect data union + * + * @connect: input connect data + * @out_client_properties: output client data + */ +struct mei_connect_client_data_vtag { + union { + struct mei_connect_client_vtag connect; + struct mei_client out_client_properties; + }; +}; + +/** + * DOC: + * This IOCTL is used to associate the current file descriptor with a + * FW Client (given by UUID), and virtual tag (vtag). + * The IOCTL opens a communication channel between a host client and + * a FW client on a tagged channel. From this point on, every read + * and write will communicate with the associated FW client with + * on the tagged channel. + * Upone close() the communication is terminated. + * + * The IOCTL argument is a struct with a union that contains + * the input parameter and the output parameter for this IOCTL. + * + * The input parameter is UUID of the FW Client, a vtag [0,255] + * The output parameter is the properties of the FW client + * (FW protocool version and max message size). + * + * Clients that do not support tagged connection + * will respond with -EOPNOTSUPP. + */ +#define IOCTL_MEI_CONNECT_CLIENT_VTAG \ + _IOWR('H', 0x04, struct mei_connect_client_data_vtag) + #endif /* _LINUX_MEI_H */ -- cgit v1.2.3 From ba171d3f0850003216fd1a85190d17b1feddb961 Mon Sep 17 00:00:00 2001 From: Cedric Neveux Date: Mon, 4 Mar 2019 08:54:23 +0100 Subject: driver: tee: Handle NULL pointer indication from client TEE Client introduce a new capability "TEE_GEN_CAP_MEMREF_NULL" to handle the support of the shared memory buffer with a NULL pointer. This capability depends on TEE Capabilities and driver support. Driver and TEE exchange capabilities at driver initialization. Signed-off-by: Michael Whitfield Signed-off-by: Cedric Neveux Reviewed-by: Joakim Bech Tested-by: Joakim Bech (QEMU) Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index b619f37ee03e..d67cadf221fc 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -51,6 +51,9 @@ #define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ #define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ #define TEE_GEN_CAP_REG_MEM (1 << 2)/* Supports registering shared memory */ +#define TEE_GEN_CAP_MEMREF_NULL (1 << 3)/* NULL MemRef support */ + +#define TEE_MEMREF_NULL (__u64)(-1) /* NULL MemRef Buffer */ /* * TEE Implementation ID @@ -200,6 +203,16 @@ struct tee_ioctl_buf_data { * a part of a shared memory by specifying an offset (@a) and size (@b) of * the object. To supply the entire shared memory object set the offset * (@a) to 0 and size (@b) to the previously returned size of the object. + * + * A client may need to present a NULL pointer in the argument + * passed to a trusted application in the TEE. + * This is also a requirement in GlobalPlatform Client API v1.0c + * (section 3.2.5 memory references), which can be found at + * http://www.globalplatform.org/specificationsdevice.asp + * + * If a NULL pointer is passed to a TA in the TEE, the (@c) + * IOCTL parameters value must be set to TEE_MEMREF_NULL indicating a NULL + * memory reference. */ struct tee_ioctl_param { __u64 attr; -- cgit v1.2.3 From 6b0a249a301e2af9adda84adbced3a2988248b95 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 21 Aug 2020 11:44:18 -0700 Subject: bpf: Implement link_query for bpf iterators This patch implemented bpf_link callback functions show_fdinfo and fill_link_info to support link_query interface. The general interface for show_fdinfo and fill_link_info will print/fill the target_name. Each targets can register show_fdinfo and fill_link_info callbacks to print/fill more target specific information. For example, the below is a fdinfo result for a bpf task iterator. $ cat /proc/1749/fdinfo/7 pos: 0 flags: 02000000 mnt_id: 14 link_type: iter link_id: 11 prog_tag: 990e1f8152f7e54f prog_id: 59 target_name: task Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821184418.574122-1-yhs@fb.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0480f893facd..a1bbaff7a0af 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4071,6 +4071,13 @@ struct bpf_link_info { __u64 cgroup_id; __u32 attach_type; } cgroup; + struct { + __aligned_u64 target_name; /* in/out: target_name buffer ptr */ + __u32 target_name_len; /* in/out: target_name buffer len */ + union { + __u32 map_id; + } map; + } iter; struct { __u32 netns_ino; __u32 attach_type; -- cgit v1.2.3 From 4ffa22fd22a7cbde1a1394b2707ea73593dc0fda Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Thu, 23 Jul 2020 09:29:43 +0300 Subject: iio: add IIO_MOD_O2 modifier Add modifier IIO_MOD_O2 for O2 concentration reporting Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index fdd81affca4b..48c13147c0a8 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -94,6 +94,7 @@ enum iio_modifier { IIO_MOD_PM10, IIO_MOD_ETHANOL, IIO_MOD_H2, + IIO_MOD_O2, }; enum iio_event_type { -- cgit v1.2.3 From eee049c0ef5b5b433f36841801e34c21c9f82a23 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Sat, 22 Aug 2020 15:59:08 +0100 Subject: l2tp: remove tunnel and session debug flags field The l2tp subsystem now uses standard kernel logging APIs for informational and warning messages, and tracepoints for debug information. Now that the tunnel and session debug flags are unused, remove the field from the core structures. Various system calls (in the case of l2tp_ppp) and netlink messages handle the getting and setting of debug flags. To avoid userspace breakage don't modify the API of these calls; simply ignore set requests, and send dummy data for get requests. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- include/uapi/linux/if_pppol2tp.h | 2 +- include/uapi/linux/l2tp.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h index 060b4d1f3129..a91044328bc9 100644 --- a/include/uapi/linux/if_pppol2tp.h +++ b/include/uapi/linux/if_pppol2tp.h @@ -75,7 +75,7 @@ struct pppol2tpv3in6_addr { }; /* Socket options: - * DEBUG - bitmask of debug message categories + * DEBUG - bitmask of debug message categories (not used) * SENDSEQ - 0 => don't send packets with sequence numbers * 1 => send packets with sequence numbers * RECVSEQ - 0 => receive packet sequence numbers are optional diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 61158f5a1a5b..88a0d32b8c07 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -108,7 +108,7 @@ enum { L2TP_ATTR_VLAN_ID, /* u16 (not used) */ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */ - L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */ + L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags (not used) */ L2TP_ATTR_RECV_SEQ, /* u8 */ L2TP_ATTR_SEND_SEQ, /* u8 */ L2TP_ATTR_LNS_MODE, /* u8 */ @@ -177,7 +177,9 @@ enum l2tp_seqmode { }; /** - * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions + * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions. + * + * Unused. * * @L2TP_MSG_DEBUG: verbose debug (if compiled in) * @L2TP_MSG_CONTROL: userspace - kernel interface -- cgit v1.2.3 From 2b8ee4f05d4f6a6c427ad30dd6c1bb49eb2efd3b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:21 -0700 Subject: tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt This change is mostly from an internal patch and adapts it from sysctl config to the bpf_setsockopt setup. The bpf_prog can set the max delay ack by using bpf_setsockopt(TCP_BPF_DELACK_MAX). This max delay ack can be communicated to its peer through bpf header option. The receiving peer can then use this max delay ack and set a potentially lower rto by using bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced in the next patch. Another later selftest patch will also use it like the above to show how to write and parse bpf tcp header option. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1bbaff7a0af..7b905cb0213e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4257,6 +4257,7 @@ enum { enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ + TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ }; struct bpf_perf_event_value { -- cgit v1.2.3 From ca584ba070864c606f3a54faaafe774726d5b4a1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:27 -0700 Subject: tcp: bpf: Add TCP_BPF_RTO_MIN for bpf_setsockopt This patch adds bpf_setsockopt(TCP_BPF_RTO_MIN) to allow bpf prog to set the min rto of a connection. It could be used together with the earlier patch which has added bpf_setsockopt(TCP_BPF_DELACK_MAX). A later selftest patch will communicate the max delay ack in a bpf tcp header option and then the receiving side can use bpf_setsockopt(TCP_BPF_RTO_MIN) to set a shorter rto. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200820190027.2884170-1-kafai@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7b905cb0213e..1ae20058b574 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4258,6 +4258,7 @@ enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ + TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ }; struct bpf_perf_event_value { -- cgit v1.2.3 From 00d211a4ea6f48e8e3b758813fe23ad28193d3bf Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:46 -0700 Subject: bpf: tcp: Add bpf_skops_parse_hdr() The patch adds a function bpf_skops_parse_hdr(). It will call the bpf prog to parse the TCP header received at a tcp_sock that has at least reached the ESTABLISHED state. For the packets received during the 3WHS (SYN, SYNACK and ACK), the received skb will be available to the bpf prog during the callback in bpf_skops_established() introduced in the previous patch and in the bpf_skops_write_hdr_opt() that will be added in the next patch. Calling bpf prog to parse header is controlled by two new flags in tp->bpf_sock_ops_cb_flags: BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG and BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG. When BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG is set, the bpf prog will only be called when there is unknown option in the TCP header. When BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG is set, the bpf prog will be called on all received TCP header. This function is half implemented to highlight the changes in TCP stack. The actual codes preparing the bpf running context and invoking the bpf prog will be added in the later patch with other necessary bpf pieces. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190046.2885054-1-kafai@fb.com --- include/uapi/linux/bpf.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1ae20058b574..010ed2abcb66 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4173,8 +4173,10 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x3F, }; /* List of known BPF sock_ops operators. -- cgit v1.2.3 From 331fca4315efa3bbd258fbdf8209d59d253c0480 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:00:52 -0700 Subject: bpf: tcp: Add bpf_skops_hdr_opt_len() and bpf_skops_write_hdr_opt() The bpf prog needs to parse the SYN header to learn what options have been sent by the peer's bpf-prog before writing its options into SYNACK. This patch adds a "syn_skb" arg to tcp_make_synack() and send_synack(). This syn_skb will eventually be made available (as read-only) to the bpf prog. This will be the only SYN packet available to the bpf prog during syncookie. For other regular cases, the bpf prog can also use the saved_syn. When writing options, the bpf prog will first be called to tell the kernel its required number of bytes. It is done by the new bpf_skops_hdr_opt_len(). The bpf prog will only be called when the new BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG is set in tp->bpf_sock_ops_cb_flags. When the bpf prog returns, the kernel will know how many bytes are needed and then update the "*remaining" arg accordingly. 4 byte alignment will be included in the "*remaining" before this function returns. The 4 byte aligned number of bytes will also be stored into the opts->bpf_opt_len. "bpf_opt_len" is a newly added member to the struct tcp_out_options. Then the new bpf_skops_write_hdr_opt() will call the bpf prog to write the header options. The bpf prog is only called if it has reserved spaces before (opts->bpf_opt_len > 0). The bpf prog is the last one getting a chance to reserve header space and writing the header option. These two functions are half implemented to highlight the changes in TCP stack. The actual codes preparing the bpf running context and invoking the bpf prog will be added in the later patch with other necessary bpf pieces. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190052.2885316-1-kafai@fb.com --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 010ed2abcb66..18d0e128bc3c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4175,8 +4175,9 @@ enum { BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0x3F, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; /* List of known BPF sock_ops operators. -- cgit v1.2.3 From 0813a841566f0962a5551be7749b43c45f0022a0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:04 -0700 Subject: bpf: tcp: Allow bpf prog to write and parse TCP header option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Note: The TCP changes here is mainly to implement the bpf pieces into the bpf_skops_*() functions introduced in the earlier patches. ] The earlier effort in BPF-TCP-CC allows the TCP Congestion Control algorithm to be written in BPF. It opens up opportunities to allow a faster turnaround time in testing/releasing new congestion control ideas to production environment. The same flexibility can be extended to writing TCP header option. It is not uncommon that people want to test new TCP header option to improve the TCP performance. Another use case is for data-center that has a more controlled environment and has more flexibility in putting header options for internal only use. For example, we want to test the idea in putting maximum delay ACK in TCP header option which is similar to a draft RFC proposal [1]. This patch introduces the necessary BPF API and use them in the TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse and write TCP header options. It currently supports most of the TCP packet except RST. Supported TCP header option: ─────────────────────────── This patch allows the bpf-prog to write any option kind. Different bpf-progs can write its own option by calling the new helper bpf_store_hdr_opt(). The helper will ensure there is no duplicated option in the header. By allowing bpf-prog to write any option kind, this gives a lot of flexibility to the bpf-prog. Different bpf-prog can write its own option kind. It could also allow the bpf-prog to support a recently standardized option on an older kernel. Sockops Callback Flags: ────────────────────── The bpf program will only be called to parse/write tcp header option if the following newly added callback flags are enabled in tp->bpf_sock_ops_cb_flags: BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG A few words on the PARSE CB flags. When the above PARSE CB flags are turned on, the bpf-prog will be called on packets received at a sk that has at least reached the ESTABLISHED state. The parsing of the SYN-SYNACK-ACK will be discussed in the "3 Way HandShake" section. The default is off for all of the above new CB flags, i.e. the bpf prog will not be called to parse or write bpf hdr option. There are details comment on these new cb flags in the UAPI bpf.h. sock_ops->skb_data and bpf_load_hdr_opt() ───────────────────────────────────────── sock_ops->skb_data and sock_ops->skb_data_end covers the whole TCP header and its options. They are read only. The new bpf_load_hdr_opt() helps to read a particular option "kind" from the skb_data. Please refer to the comment in UAPI bpf.h. It has details on what skb_data contains under different sock_ops->op. 3 Way HandShake ─────────────── The bpf-prog can learn if it is sending SYN or SYNACK by reading the sock_ops->skb_tcp_flags. * Passive side When writing SYNACK (i.e. sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB), the received SYN skb will be available to the bpf prog. The bpf prog can use the SYN skb (which may carry the header option sent from the remote bpf prog) to decide what bpf header option should be written to the outgoing SYNACK skb. The SYN packet can be obtained by getsockopt(TCP_BPF_SYN*). More on this later. Also, the bpf prog can learn if it is in syncookie mode (by checking sock_ops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE). The bpf prog can store the received SYN pkt by using the existing bpf_setsockopt(TCP_SAVE_SYN). The example in a later patch does it. [ Note that the fullsock here is a listen sk, bpf_sk_storage is not very useful here since the listen sk will be shared by many concurrent connection requests. Extending bpf_sk_storage support to request_sock will add weight to the minisock and it is not necessary better than storing the whole ~100 bytes SYN pkt. ] When the connection is established, the bpf prog will be called in the existing PASSIVE_ESTABLISHED_CB callback. At that time, the bpf prog can get the header option from the saved syn and then apply the needed operation to the newly established socket. The later patch will use the max delay ack specified in the SYN header and set the RTO of this newly established connection as an example. The received ACK (that concludes the 3WHS) will also be available to the bpf prog during PASSIVE_ESTABLISHED_CB through the sock_ops->skb_data. It could be useful in syncookie scenario. More on this later. There is an existing getsockopt "TCP_SAVED_SYN" to return the whole saved syn pkt which includes the IP[46] header and the TCP header. A few "TCP_BPF_SYN*" getsockopt has been added to allow specifying where to start getting from, e.g. starting from TCP header, or from IP[46] header. The new getsockopt(TCP_BPF_SYN*) will also know where it can get the SYN's packet from: - (a) the just received syn (available when the bpf prog is writing SYNACK) and it is the only way to get SYN during syncookie mode. or - (b) the saved syn (available in PASSIVE_ESTABLISHED_CB and also other existing CB). The bpf prog does not need to know where the SYN pkt is coming from. The getsockopt(TCP_BPF_SYN*) will hide this details. Similarly, a flags "BPF_LOAD_HDR_OPT_TCP_SYN" is also added to bpf_load_hdr_opt() to read a particular header option from the SYN packet. * Fastopen Fastopen should work the same as the regular non fastopen case. This is a test in a later patch. * Syncookie For syncookie, the later example patch asks the active side's bpf prog to resend the header options in ACK. The server can use bpf_load_hdr_opt() to look at the options in this received ACK during PASSIVE_ESTABLISHED_CB. * Active side The bpf prog will get a chance to write the bpf header option in the SYN packet during WRITE_HDR_OPT_CB. The received SYNACK pkt will also be available to the bpf prog during the existing ACTIVE_ESTABLISHED_CB callback through the sock_ops->skb_data and bpf_load_hdr_opt(). * Turn off header CB flags after 3WHS If the bpf prog does not need to write/parse header options beyond the 3WHS, the bpf prog can clear the bpf_sock_ops_cb_flags to avoid being called for header options. Or the bpf-prog can select to leave the UNKNOWN_HDR_OPT_CB_FLAG on so that the kernel will only call it when there is option that the kernel cannot handle. [1]: draft-wang-tcpm-low-latency-opt-00 https://tools.ietf.org/html/draft-wang-tcpm-low-latency-opt-00 Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200820190104.2885895-1-kafai@fb.com --- include/uapi/linux/bpf.h | 300 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 299 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18d0e128bc3c..f67ec5d9e57d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3395,6 +3395,120 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * + * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) + * Description + * Load header option. Support reading a particular TCP header + * option for bpf program (BPF_PROG_TYPE_SOCK_OPS). + * + * If *flags* is 0, it will search the option from the + * sock_ops->skb_data. The comment in "struct bpf_sock_ops" + * has details on what skb_data contains under different + * sock_ops->op. + * + * The first byte of the *searchby_res* specifies the + * kind that it wants to search. + * + * If the searching kind is an experimental kind + * (i.e. 253 or 254 according to RFC6994). It also + * needs to specify the "magic" which is either + * 2 bytes or 4 bytes. It then also needs to + * specify the size of the magic by using + * the 2nd byte which is "kind-length" of a TCP + * header option and the "kind-length" also + * includes the first 2 bytes "kind" and "kind-length" + * itself as a normal TCP header option also does. + * + * For example, to search experimental kind 254 with + * 2 byte magic 0xeB9F, the searchby_res should be + * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. + * + * To search for the standard window scale option (3), + * the searchby_res should be [ 3, 0, 0, .... 0 ]. + * Note, kind-length must be 0 for regular option. + * + * Searching for No-Op (0) and End-of-Option-List (1) are + * not supported. + * + * *len* must be at least 2 bytes which is the minimal size + * of a header option. + * + * Supported flags: + * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the + * saved_syn packet or the just-received syn packet. + * + * Return + * >0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. + * + * **-EINVAL** If param is invalid + * + * **-ENOMSG** The option is not found + * + * **-ENOENT** No syn packet available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used + * + * **-ENOSPC** Not enough space. Only *len* number of + * bytes are copied. + * + * **-EFAULT** Cannot parse the header options in the packet + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. + * + * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) + * Description + * Store header option. The data will be copied + * from buffer *from* with length *len* to the TCP header. + * + * The buffer *from* should have the whole option that + * includes the kind, kind-length, and the actual + * option data. The *len* must be at least kind-length + * long. The kind-length does not have to be 4 byte + * aligned. The kernel will take care of the padding + * and setting the 4 bytes aligned value to th->doff. + * + * This helper will check for duplicated option + * by searching the same option in the outgoing skb. + * + * This helper can only be called during + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** If param is invalid + * + * **-ENOSPC** Not enough space in the header. + * Nothing has been written + * + * **-EEXIST** The option has already existed + * + * **-EFAULT** Cannot parse the existing header options + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. + * + * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) + * Description + * Reserve *len* bytes for the bpf header option. The + * space will be used by bpf_store_hdr_opt() later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * If bpf_reserve_hdr_opt() is called multiple times, + * the total number of bytes will be reserved. + * + * This helper can only be called during + * BPF_SOCK_OPS_HDR_OPT_LEN_CB. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** if param is invalid + * + * **-ENOSPC** Not enough space in the header. + * + * **-EPERM** This helper cannot be used under the + * current sock_ops->op. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3539,6 +3653,9 @@ union bpf_attr { FN(skc_to_tcp_request_sock), \ FN(skc_to_udp6_sock), \ FN(get_task_stack), \ + FN(load_hdr_opt), \ + FN(store_hdr_opt), \ + FN(reserve_hdr_opt), /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4165,6 +4282,36 @@ struct bpf_sock_ops { __u64 bytes_received; __u64 bytes_acked; __bpf_md_ptr(struct bpf_sock *, sk); + /* [skb_data, skb_data_end) covers the whole TCP header. + * + * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received + * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the + * header has not been written. + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have + * been written so far. + * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes + * the 3WHS. + * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes + * the 3WHS. + * + * bpf_load_hdr_opt() can also be used to read a particular option. + */ + __bpf_md_ptr(void *, skb_data); + __bpf_md_ptr(void *, skb_data_end); + __u32 skb_len; /* The total length of a packet. + * It includes the header, options, + * and payload. + */ + __u32 skb_tcp_flags; /* tcp_flags of the header. It provides + * an easy way to check for tcp_flags + * without parsing skb_data. + * + * In particular, the skb_tcp_flags + * will still be available in + * BPF_SOCK_OPS_HDR_OPT_LEN even though + * the outgoing header has not + * been written yet. + */ }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -4173,8 +4320,48 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), - BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf for all received TCP headers. The bpf prog will be + * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + * + * It could be used at the client/active side (i.e. connect() side) + * when the server told it that the server was in syncookie + * mode and required the active side to resend the bpf-written + * options. The active side can keep writing the bpf-options until + * it received a valid packet from the server side to confirm + * the earlier packet (and options) has been received. The later + * example patch is using it like this at the active side when the + * server is in syncookie mode. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf when kernel has received a header option that + * the kernel cannot handle. The bpf prog will be called under + * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + */ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + /* Call bpf when the kernel is writing header options for the + * outgoing packet. The bpf prog will first be called + * to reserve space in a skb under + * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then + * the bpf prog will be called to write the header option(s) + * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB + * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option + * related helpers that will be useful to the bpf programs. + * + * The kernel gets its chance to reserve space and write + * options first before the BPF program does. + */ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, @@ -4233,6 +4420,63 @@ enum { */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. */ + BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. + * It will be called to handle + * the packets received at + * an already established + * connection. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the TCP header only. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option. + */ + BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the + * header option later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Not available because no header has + * been written yet. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the + * outgoing skb. (e.g. SYN, ACK, FIN). + * + * bpf_reserve_hdr_opt() should + * be used to reserve space. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Referring to the outgoing skb. + * It covers the TCP header + * that has already been written + * by the kernel and the + * earlier bpf-progs. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the outgoing + * skb. (e.g. SYN, ACK, FIN). + * + * bpf_store_hdr_opt() should + * be used to write the + * option. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option that + * has already been written + * by the kernel or the + * earlier bpf-progs. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -4262,6 +4506,60 @@ enum { TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ + /* Copy the SYN pkt to optval + * + * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the + * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit + * to only getting from the saved_syn. It can either get the + * syn packet from: + * + * 1. the just-received SYN packet (only available when writing the + * SYNACK). It will be useful when it is not necessary to + * save the SYN packet for latter use. It is also the only way + * to get the SYN during syncookie mode because the syn + * packet cannot be saved during syncookie. + * + * OR + * + * 2. the earlier saved syn which was done by + * bpf_setsockopt(TCP_SAVE_SYN). + * + * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the + * SYN packet is obtained. + * + * If the bpf-prog does not need the IP[46] header, the + * bpf-prog can avoid parsing the IP header by using + * TCP_BPF_SYN. Otherwise, the bpf-prog can get both + * IP[46] and TCP header by using TCP_BPF_SYN_IP. + * + * >0: Total number of bytes copied + * -ENOSPC: Not enough space in optval. Only optlen number of + * bytes is copied. + * -ENOENT: The SYN skb is not available now and the earlier SYN pkt + * is not saved by setsockopt(TCP_SAVE_SYN). + */ + TCP_BPF_SYN = 1005, /* Copy the TCP header */ + TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ +}; + +enum { + BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0), +}; + +/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + */ +enum { + BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the + * total option spaces + * required for an established + * sk in order to calculate the + * MSS. No skb is actually + * sent. + */ + BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode + * when sending a SYN. + */ }; struct bpf_perf_event_value { -- cgit v1.2.3 From 267cf9fa43d1c9d525d5d818a8651f2900e3aa9e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 20 Aug 2020 12:01:23 -0700 Subject: tcp: bpf: Optionally store mac header in TCP_SAVE_SYN This patch is adapted from Eric's patch in an earlier discussion [1]. The TCP_SAVE_SYN currently only stores the network header and tcp header. This patch allows it to optionally store the mac header also if the setsockopt's optval is 2. It requires one more bit for the "save_syn" bit field in tcp_sock. This patch achieves this by moving the syn_smc bit next to the is_mptcp. The syn_smc is currently used with the TCP experimental option. Since syn_smc is only used when CONFIG_SMC is enabled, this patch also puts the "IS_ENABLED(CONFIG_SMC)" around it like the is_mptcp did with "IS_ENABLED(CONFIG_MPTCP)". The mac_hdrlen is also stored in the "struct saved_syn" to allow a quick offset from the bpf prog if it chooses to start getting from the network header or the tcp header. [1]: https://lore.kernel.org/netdev/CANn89iLJNWh6bkH7DNhy_kmcAexuUCccqERqe7z2QsvPhGrYPQ@mail.gmail.com/ Suggested-by: Eric Dumazet Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/bpf/20200820190123.2886935-1-kafai@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f67ec5d9e57d..544b89a64918 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4540,6 +4540,7 @@ enum { */ TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ + TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ }; enum { -- cgit v1.2.3 From f836a56e84ffc9f1a1cd73f77e10404ca46a4616 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:15 +0200 Subject: bpf: Generalize bpf_sk_storage Refactor the functionality in bpf_sk_storage.c so that concept of storage linked to kernel objects can be extended to other objects like inode, task_struct etc. Each new local storage will still be a separate map and provide its own set of helpers. This allows for future object specific extensions and still share a lot of the underlying implementation. This includes the changes suggested by Martin in: https://lore.kernel.org/bpf/20200725013047.4006241-1-kafai@fb.com/ adding new map operations to support bpf_local_storage maps: * storages for different kernel objects to optionally have different memory charging strategy (map_local_storage_charge, map_local_storage_uncharge) * Functionality to extract the storage pointer from a pointer to the owning object (map_owner_storage_ptr) Co-developed-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825182919.1118197-4-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 544b89a64918..2cbd137eed86 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3765,9 +3765,13 @@ enum { BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), }; -/* BPF_FUNC_sk_storage_get flags */ +/* BPF_FUNC__storage_get flags */ enum { - BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), + BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0), + /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility + * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead. + */ + BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE, }; /* BPF_FUNC_read_branch_records flags. */ -- cgit v1.2.3 From 8ea636848aca35b9f97c5b5dee30225cf2dd0fe6 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:17 +0200 Subject: bpf: Implement bpf_local_storage for inodes Similar to bpf_local_storage for sockets, add local storage for inodes. The life-cycle of storage is managed with the life-cycle of the inode. i.e. the storage is destroyed along with the owning inode. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200825182919.1118197-6-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2cbd137eed86..b6bfcd085a76 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -155,6 +155,7 @@ enum bpf_map_type { BPF_MAP_TYPE_DEVMAP_HASH, BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, + BPF_MAP_TYPE_INODE_STORAGE, }; /* Note that tracing related programs such as @@ -3509,6 +3510,41 @@ union bpf_attr { * * **-EPERM** This helper cannot be used under the * current sock_ops->op. + * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) + * Description + * Get a bpf_local_storage from an *inode*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *inode* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this + * helper enforces the key must be an inode and the map must also + * be a **BPF_MAP_TYPE_INODE_STORAGE**. + * + * Underneath, the value is stored locally at *inode* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *inode*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * int bpf_inode_storage_delete(struct bpf_map *map, void *inode) + * Description + * Delete a bpf_local_storage from an *inode*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3655,7 +3691,9 @@ union bpf_attr { FN(get_task_stack), \ FN(load_hdr_opt), \ FN(store_hdr_opt), \ - FN(reserve_hdr_opt), + FN(reserve_hdr_opt), \ + FN(inode_storage_get), \ + FN(inode_storage_delete), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 30897832d8b97e93833fb52c0a02951db3692ed2 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 25 Aug 2020 20:29:18 +0200 Subject: bpf: Allow local storage to be used from LSM programs Adds support for both bpf_{sk, inode}_storage_{get, delete} to be used in LSM programs. These helpers are not used for tracing programs (currently) as their usage is tied to the life-cycle of the object and should only be used where the owning object won't be freed (when the owning object is passed as an argument to the LSM hook). Thus, they are safer to use in LSM hooks than tracing. Usage of local storage in tracing programs will probably follow a per function based whitelist approach. Since the UAPI helper signature for bpf_sk_storage expect a bpf_sock, it, leads to a compilation warning for LSM programs, it's also updated to accept a void * pointer instead. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200825182919.1118197-7-kpsingh@chromium.org --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b6bfcd085a76..0e1cdf806fe1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2808,7 +2808,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) * Description * Get a bpf-local-storage from a *sk*. * @@ -2824,6 +2824,9 @@ union bpf_attr { * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf-local-storages residing at *sk*. * + * *sk* is a kernel **struct sock** pointer for LSM program. + * *sk* is a **struct bpf_sock** pointer for other program types. + * * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used @@ -2836,7 +2839,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, void *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return -- cgit v1.2.3 From 6e22ab9da79343532cd3cde39df25e5a5478c692 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 25 Aug 2020 21:21:20 +0200 Subject: bpf: Add d_path helper Adding d_path helper function that returns full path for given 'struct path' object, which needs to be the kernel BTF 'path' object. The path is returned in buffer provided 'buf' of size 'sz' and is zero terminated. bpf_d_path(&file->f_path, buf, size); The helper calls directly d_path function, so there's only limited set of function it can be called from. Adding just very modest set for the start. Updating also bpf.h tools uapi header and adding 'path' to bpf_helpers_doc.py script. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200825192124.710397-11-jolsa@kernel.org --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0e1cdf806fe1..0388bc0200b0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3513,6 +3513,7 @@ union bpf_attr { * * **-EPERM** This helper cannot be used under the * current sock_ops->op. + * * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) * Description * Get a bpf_local_storage from an *inode*. @@ -3548,6 +3549,18 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * long bpf_d_path(struct path *path, char *buf, u32 sz) + * Description + * Return full path for given 'struct path' object, which + * needs to be the kernel BTF 'path' object. The path is + * returned in the provided buffer 'buf' of size 'sz' and + * is zero terminated. + * + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3697,6 +3710,7 @@ union bpf_attr { FN(reserve_hdr_opt), \ FN(inode_storage_get), \ FN(inode_storage_delete), \ + FN(d_path), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From b305dfe2e93434b12d438434461b709641f62af4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 20 Aug 2020 12:47:16 +0200 Subject: media: videodev2.h: RGB BT2020 and HSV are always full range The default RGB quantization range for BT.2020 is full range (just as for all the other RGB pixel encodings), not limited range. Update the V4L2_MAP_QUANTIZATION_DEFAULT macro and documentation accordingly. Also mention that HSV is always full range and cannot be limited range. When RGB BT2020 was introduced in V4L2 it was not clear whether it should be limited or full range, but full range is the right (and consistent) choice. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c7b70ff53bc1..4769628790da 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -375,9 +375,9 @@ enum v4l2_hsv_encoding { enum v4l2_quantization { /* - * The default for R'G'B' quantization is always full range, except - * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG: this is full range. + * The default for R'G'B' quantization is always full range. + * For Y'CbCr the quantization is always limited range, except + * for COLORSPACE_JPEG: this is full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -386,14 +386,13 @@ enum v4l2_quantization { /* * Determine how QUANTIZATION_DEFAULT should map to a proper quantization. - * This depends on whether the image is RGB or not, the colorspace and the - * Y'CbCr encoding. + * This depends on whether the image is RGB or not, the colorspace. + * The Y'CbCr encoding is not used anymore, but is still there for backwards + * compatibility. */ #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \ - (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ - V4L2_QUANTIZATION_LIM_RANGE : \ - (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ - V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) + (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ + V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE) /* * Deprecated names for opRGB colorspace (IEC 61966-2-5) -- cgit v1.2.3 From 493a0ebd804c986e6bd207603c5e1ca748470d3d Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Mon, 13 Apr 2020 09:20:53 -0700 Subject: nl80211: fix PORT_AUTHORIZED wording to reflect behavior The CMD_PORT_AUTHORIZED event was described as an event which indicated a successfully completed 4-way handshake. But the behavior was not as advertized. The only driver which uses this is brcmfmac, and this driver only sends the event after a successful 802.1X-FT roam. This prevents userspace applications from knowing if the 4-way completed on: 1. Normal 802.1X connects 2. Normal PSK connections 3. FT-PSK roams wpa_supplicant handles this incorrect behavior by just completing the connection after association, before the 4-way has completed. If the 4-way ends up failing it disconnects at that point. Since this behavior appears to be expected (wpa_s handles it this way) I have changed the wording in the API description to reflect the actual behavior. Signed-off-by: James Prestwood Link: https://lore.kernel.org/r/20200413162053.3711-1-prestwoj@gmail.com [fix spelling of 802.1X throughout ...] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 631f3a997b3c..8cc2b825e4e4 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -647,13 +647,9 @@ * authentication/association or not receiving a response from the AP. * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as * well to remain backwards compatible. - * When establishing a security association, drivers that support 4 way - * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when - * the 4 way handshake is completed successfully. * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself. - * When a security association was established with the new AP (e.g. if - * the FT protocol was used for roaming or the driver completed the 4 way - * handshake), this event should be followed by an + * When a security association was established on an 802.1X network using + * fast transition, this event should be followed by an * %NL80211_CMD_PORT_AUTHORIZED event. * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify * userspace that a connection was dropped by the AP or due to other @@ -1067,13 +1063,11 @@ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously * configured PMK for the authenticator address identified by * %NL80211_ATTR_MAC. - * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way - * handshake was completed successfully by the driver. The BSSID is - * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake - * offload should send this event after indicating 802.11 association with - * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed - * %NL80211_CMD_DISCONNECT should be indicated instead. - * + * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates an 802.1X FT roam was + * completed successfully. Drivers that support 4 way handshake offload + * should send this event after indicating 802.1X FT assocation with + * %NL80211_CMD_ROAM. If the 4 way handshake failed %NL80211_CMD_DISCONNECT + * should be indicated instead. * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request * and RX notification. This command is used both as a request to transmit * a control port frame and as a notification that a control port frame -- cgit v1.2.3 From eb89a6a6b7a1af2d9c8d83ee44fa67700d6337e7 Mon Sep 17 00:00:00 2001 From: Miles Hu Date: Tue, 4 Aug 2020 10:16:29 +0200 Subject: nl80211: add support for setting fixed HE rate/gi/ltf This patch adds the nl80211 structs, definitions, policies and parsing code required to pass fixed HE rate, GI and LTF settings. Signed-off-by: Miles Hu Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200804081630.2013619-1-john@phrozen.org [fix comment] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 8cc2b825e4e4..1a4b922f489f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3180,6 +3180,18 @@ enum nl80211_he_gi { NL80211_RATE_INFO_HE_GI_3_2, }; +/** + * enum nl80211_he_ltf - HE long training field + * @NL80211_RATE_INFO_HE_1xLTF: 3.2 usec + * @NL80211_RATE_INFO_HE_2xLTF: 6.4 usec + * @NL80211_RATE_INFO_HE_4xLTF: 12.8 usec + */ +enum nl80211_he_ltf { + NL80211_RATE_INFO_HE_1XLTF, + NL80211_RATE_INFO_HE_2XLTF, + NL80211_RATE_INFO_HE_4XLTF, +}; + /** * enum nl80211_he_ru_alloc - HE RU allocation values * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation @@ -4735,6 +4747,10 @@ enum nl80211_key_attributes { * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection, * see &struct nl80211_txrate_vht * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi + * @NL80211_TXRATE_HE: HE rates allowed for TX rate selection, + * see &struct nl80211_txrate_he + * @NL80211_TXRATE_HE_GI: configure HE GI, 0.8us, 1.6us and 3.2us. + * @NL80211_TXRATE_HE_LTF: configure HE LTF, 1XLTF, 2XLTF and 4XLTF. * @__NL80211_TXRATE_AFTER_LAST: internal * @NL80211_TXRATE_MAX: highest TX rate attribute */ @@ -4744,6 +4760,9 @@ enum nl80211_tx_rate_attributes { NL80211_TXRATE_HT, NL80211_TXRATE_VHT, NL80211_TXRATE_GI, + NL80211_TXRATE_HE, + NL80211_TXRATE_HE_GI, + NL80211_TXRATE_HE_LTF, /* keep last */ __NL80211_TXRATE_AFTER_LAST, @@ -4761,6 +4780,15 @@ struct nl80211_txrate_vht { __u16 mcs[NL80211_VHT_NSS_MAX]; }; +#define NL80211_HE_NSS_MAX 8 +/** + * struct nl80211_txrate_he - HE MCS/NSS txrate bitmap + * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.) + */ +struct nl80211_txrate_he { + __u16 mcs[NL80211_HE_NSS_MAX]; +}; + enum nl80211_txrate_gi { NL80211_TXRATE_DEFAULT_GI, NL80211_TXRATE_FORCE_SGI, -- cgit v1.2.3 From 00c207edfb2bff9cf03a8f21e57c9c752a1d9f16 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Tue, 11 Aug 2020 10:01:03 +0200 Subject: nl80211: rename csa counter attributes countdown counters We want to reuse the attributes for other counters such as BSS color change. Rename them to more generic names. Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200811080107.3615705-1-john@phrozen.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1a4b922f489f..ec96d5fe0e05 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2076,10 +2076,10 @@ enum nl80211_commands { * operation). * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information * for the time while performing a channel switch. - * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel - * switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL). - * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel - * switch counters in the probe response (%NL80211_ATTR_PROBE_RESP). + * @NL80211_ATTR_CNTDWN_OFFS_BEACON: An array of offsets (u16) to the channel + * switch or color change counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL). + * @NL80211_ATTR_CNTDWN_OFFS_PRESP: An array of offsets (u16) to the channel + * switch or color change counters in the probe response (%NL80211_ATTR_PROBE_RESP). * * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32. * As specified in the &enum nl80211_rxmgmt_flags. @@ -2815,8 +2815,8 @@ enum nl80211_attrs { NL80211_ATTR_CH_SWITCH_COUNT, NL80211_ATTR_CH_SWITCH_BLOCK_TX, NL80211_ATTR_CSA_IES, - NL80211_ATTR_CSA_C_OFF_BEACON, - NL80211_ATTR_CSA_C_OFF_PRESP, + NL80211_ATTR_CNTDWN_OFFS_BEACON, + NL80211_ATTR_CNTDWN_OFFS_PRESP, NL80211_ATTR_RXMGMT_FLAGS, @@ -3003,6 +3003,8 @@ enum nl80211_attrs { #define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER #define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA +#define NL80211_ATTR_CSA_C_OFF_BEACON NL80211_ATTR_CNTDWN_OFFS_BEACON +#define NL80211_ATTR_CSA_C_OFF_PRESP NL80211_ATTR_CNTDWN_OFFS_PRESP /* * Allow user space programs to use #ifdef on new attributes by defining them -- cgit v1.2.3 From 2831a631022eed6e3f800f08892132c6edde652c Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Mon, 17 Aug 2020 02:33:15 -0500 Subject: nl80211: support SAE authentication offload in AP mode Let drivers advertise support for AP-mode SAE authentication offload with a new NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Link: https://lore.kernel.org/r/20200817073316.33402-4-stanley.hsu@cypress.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ec96d5fe0e05..0584e0d349f0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -252,9 +252,13 @@ * DOC: SAE authentication offload * * By setting @NL80211_EXT_FEATURE_SAE_OFFLOAD flag drivers can indicate they - * support offloading SAE authentication for WPA3-Personal networks. In - * %NL80211_CMD_CONNECT the password for SAE should be specified using - * %NL80211_ATTR_SAE_PASSWORD. + * support offloading SAE authentication for WPA3-Personal networks in station + * mode. Similarly @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP flag can be set by + * drivers indicating the offload support in AP mode. + * + * The password for SAE should be specified using %NL80211_ATTR_SAE_PASSWORD in + * %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP for station and AP mode + * respectively. */ /** @@ -5845,6 +5849,9 @@ enum nl80211_feature_flags { * handshake with PSK in AP mode (PSK is passed as part of the start AP * command). * + * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication + * in AP mode (SAE password is passed as part of the start AP command). + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5902,6 +5909,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS, NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK, + NL80211_EXT_FEATURE_SAE_OFFLOAD_AP, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 50aba46c234ea6ab3134cebb5ab27885f33a3e5d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 27 Aug 2020 14:19:23 +0200 Subject: gtp: add notification mechanism Like all other network functions, let's notify gtp context on creation and deletion. Signed-off-by: Nicolas Dichtel Tested-by: Gabriel Ganne Acked-by: Harald Welte Signed-off-by: David S. Miller --- include/uapi/linux/gtp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index c7d66755d212..79f9191bbb24 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -2,6 +2,8 @@ #ifndef _UAPI_LINUX_GTP_H_ #define _UAPI_LINUX_GTP_H_ +#define GTP_GENL_MCGRP_NAME "gtp" + enum gtp_genl_cmds { GTP_CMD_NEWPDP, GTP_CMD_DELPDP, -- cgit v1.2.3 From dab741e0e02bd3c4f5e2e97be74b39df2523fc6e Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Thu, 27 Aug 2020 11:09:46 -0600 Subject: Add a "nosymfollow" mount option. For mounts that have the new "nosymfollow" option, don't follow symlinks when resolving paths. The new option is similar in spirit to the existing "nodev", "noexec", and "nosuid" options, as well as to the LOOKUP_NO_SYMLINKS resolve flag in the openat2(2) syscall. Various BSD variants have been supporting the "nosymfollow" mount option for a long time with equivalent implementations. Note that symlinks may still be created on file systems mounted with the "nosymfollow" option present. readlink() remains functional, so user space code that is aware of symlinks can still choose to follow them explicitly. Setting the "nosymfollow" mount option helps prevent privileged writers from modifying files unintentionally in case there is an unexpected link along the accessed path. The "nosymfollow" option is thus useful as a defensive measure for systems that need to deal with untrusted file systems in privileged contexts. More information on the history and motivation for this patch can be found here: https://sites.google.com/a/chromium.org/dev/chromium-os/chromiumos-design-docs/hardening-against-malicious-stateful-data#TOC-Restricting-symlink-traversal Signed-off-by: Mattias Nissler Signed-off-by: Ross Zwisler Reviewed-by: Aleksa Sarai Signed-off-by: Al Viro --- include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 96a0240f23fe..dd8306ea336c 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -16,6 +16,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 -- cgit v1.2.3 From b0c9eb37817943840a1a82dbc998c491609a0afd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 27 Aug 2020 22:19:22 -0700 Subject: bpf: Make bpf_link_info.iter similar to bpf_iter_link_info bpf_link_info.iter is used by link_query to return bpf_iter_link_info to user space. Fields may be different, e.g., map_fd vs. map_id, so we cannot reuse the exact structure. But make them similar, e.g., struct bpf_link_info { /* common fields */ union { struct { ... } raw_tracepoint; struct { ... } tracing; ... struct { /* common fields for iter */ union { struct { __u32 map_id; } map; /* other structs for other targets */ }; }; }; }; so the structure is extensible the same way as bpf_iter_link_info. Fixes: 6b0a249a301e ("bpf: Implement link_query for bpf iterators") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200828051922.758950-1-yhs@fb.com --- include/uapi/linux/bpf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0388bc0200b0..ef7af384f5ee 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4251,8 +4251,10 @@ struct bpf_link_info { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ union { - __u32 map_id; - } map; + struct { + __u32 map_id; + } map; + }; } iter; struct { __u32 netns_ino; -- cgit v1.2.3 From 7a81575b806e5dab214025e6757362c62d946405 Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Thu, 20 Aug 2020 10:19:01 +0200 Subject: netfilter: nf_tables: add userdata attributes to nft_table Enables storing userdata for nft_table. Field udata points to user data and udlen store its length. Adds new attribute flag NFTA_TABLE_USERDATA Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 42f351c1f5c5..aeb88cbd303e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -172,6 +172,7 @@ enum nft_table_flags { * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) + * @NFTA_TABLE_USERDATA: user data (NLA_BINARY) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, @@ -180,6 +181,7 @@ enum nft_table_attributes { NFTA_TABLE_USE, NFTA_TABLE_HANDLE, NFTA_TABLE_PAD, + NFTA_TABLE_USERDATA, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) -- cgit v1.2.3 From 4afc41dfa5a716e9e7a90c22972583f337c0bcbf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:43 +0200 Subject: netfilter: conntrack: remove ignore stats This counter increments when nf_conntrack_in sees a packet that already has a conntrack attached or when the packet is marked as UNTRACKED. Neither is an error. The former is normal for loopback traffic. The second happens for certain ICMPv6 packets or when nftables/ip(6)tables rules are in place. In case someone needs to count UNTRACKED packets, or packets that are marked as untracked before conntrack_in this can be done with both nftables and ip(6)tables rules. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 262881792671..3e471558da82 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -247,7 +247,7 @@ enum ctattr_stats_cpu { CTA_STATS_FOUND, CTA_STATS_NEW, /* no longer used */ CTA_STATS_INVALID, - CTA_STATS_IGNORE, + CTA_STATS_IGNORE, /* no longer used */ CTA_STATS_DELETE, /* no longer used */ CTA_STATS_DELETE_LIST, /* no longer used */ CTA_STATS_INSERT, -- cgit v1.2.3 From bc92470413f3af152db0d8f90ef3eb13f8cc417a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:44 +0200 Subject: netfilter: conntrack: add clash resolution stat counter There is a misconception about what "insert_failed" means. We increment this even when a clash got resolved, so it might not indicate a problem. Add a dedicated counter for clash resolution and only increment insert_failed if a clash cannot be resolved. For the old /proc interface, export this in place of an older stat that got removed a while back. For ctnetlink, export this with a new attribute. Also correct an outdated comment that implies we add a duplicate tuple -- we only add the (unique) reply direction. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 3e471558da82..d8484be72fdc 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -256,6 +256,7 @@ enum ctattr_stats_cpu { CTA_STATS_EARLY_DROP, CTA_STATS_ERROR, CTA_STATS_SEARCH_RESTART, + CTA_STATS_CLASH_RESOLVE, __CTA_STATS_MAX, }; #define CTA_STATS_MAX (__CTA_STATS_MAX - 1) -- cgit v1.2.3 From 1e6c62a8821557720a9b2ea9617359b264f2f67c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:11 -0700 Subject: bpf: Introduce sleepable BPF programs Introduce sleepable BPF programs that can request such property for themselves via BPF_F_SLEEPABLE flag at program load time. In such case they will be able to use helpers like bpf_copy_from_user() that might sleep. At present only fentry/fexit/fmod_ret and lsm programs can request to be sleepable and only when they are attached to kernel functions that are known to allow sleeping. The non-sleepable programs are relying on implicit rcu_read_lock() and migrate_disable() to protect life time of programs, maps that they use and per-cpu kernel structures used to pass info between bpf programs and the kernel. The sleepable programs cannot be enclosed into rcu_read_lock(). migrate_disable() maps to preempt_disable() in non-RT kernels, so the progs should not be enclosed in migrate_disable() as well. Therefore rcu_read_lock_trace is used to protect the life time of sleepable progs. There are many networking and tracing program types. In many cases the 'struct bpf_prog *' pointer itself is rcu protected within some other kernel data structure and the kernel code is using rcu_dereference() to load that program pointer and call BPF_PROG_RUN() on it. All these cases are not touched. Instead sleepable bpf programs are allowed with bpf trampoline only. The program pointers are hard-coded into generated assembly of bpf trampoline and synchronize_rcu_tasks_trace() is used to protect the life time of the program. The same trampoline can hold both sleepable and non-sleepable progs. When rcu_read_lock_trace is held it means that some sleepable bpf program is running from bpf trampoline. Those programs can use bpf arrays and preallocated hash/lru maps. These map types are waiting on programs to complete via synchronize_rcu_tasks_trace(); Updates to trampoline now has to do synchronize_rcu_tasks_trace() and synchronize_rcu_tasks() to wait for sleepable progs to finish and for trampoline assembly to finish. This is the first step of introducing sleepable progs. Eventually dynamically allocated hash maps can be allowed and networking program types can become sleepable too. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-3-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ef7af384f5ee..6e8b706aeb05 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -346,6 +346,14 @@ enum bpf_link_type { /* The verifier internal test flag. Behavior is undefined */ #define BPF_F_TEST_STATE_FREQ (1U << 3) +/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will + * restrict map and helper usage for such programs. Sleepable BPF programs can + * only be attached to hooks where kernel execution context allows sleeping. + * Such programs are allowed to use helpers that may sleep like + * bpf_copy_from_user(). + */ +#define BPF_F_SLEEPABLE (1U << 4) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * -- cgit v1.2.3 From 07be4c4a3e7a0db148e44b16c5190e753d1c8569 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 27 Aug 2020 15:01:12 -0700 Subject: bpf: Add bpf_copy_from_user() helper. Sleepable BPF programs can now use copy_from_user() to access user memory. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20200827220114.69225-4-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6e8b706aeb05..a613750d5515 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3569,6 +3569,13 @@ union bpf_attr { * On success, the strictly positive length of the string, * including the trailing NUL character. On error, a negative * value. + * + * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) + * Description + * Read *size* bytes from user space address *user_ptr* and store + * the data in *dst*. This is a wrapper of copy_from_user(). + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3719,6 +3726,7 @@ union bpf_attr { FN(inode_storage_get), \ FN(inode_storage_delete), \ FN(d_path), \ + FN(copy_from_user), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 67407a406db337acdaabecd3747d160d89a929e4 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sat, 29 Aug 2020 08:19:15 +0200 Subject: netfilter: nft_socket: add wildcard support Add NFT_SOCKET_WILDCARD to match to wildcard socket listener. Signed-off-by: Balazs Scheidler Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aeb88cbd303e..543dc697b796 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1010,10 +1010,12 @@ enum nft_socket_attributes { * * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option * @NFT_SOCKET_MARK: Value of the socket mark + * @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0) */ enum nft_socket_keys { NFT_SOCKET_TRANSPARENT, NFT_SOCKET_MARK, + NFT_SOCKET_WILDCARD, __NFT_SOCKET_MAX }; #define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1) -- cgit v1.2.3 From 55977744f9d862512a524fea93fc5226b09e76a9 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 28 Aug 2020 18:50:42 -0400 Subject: drm/amdkfd: Add GPU reset SMI event Add support for reporting GPU reset events through SMI. KFD would report both pre and post GPU reset events. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index cb1f963a84e0..8b7368bfbd84 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -453,6 +453,8 @@ enum kfd_smi_event { KFD_SMI_EVENT_NONE = 0, /* not used */ KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ KFD_SMI_EVENT_THERMAL_THROTTLE = 2, + KFD_SMI_EVENT_GPU_PRE_RESET = 3, + KFD_SMI_EVENT_GPU_POST_RESET = 4, }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) -- cgit v1.2.3 From 5dc1a0bcb758c343b873e8330ee986417f5a1727 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 28 Aug 2020 19:53:08 -0400 Subject: include/uapi/linux: Fix indentation in kfd_smi_event enum Replace spaces with Tabs to fix indentation in kfd_smi_event enum. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 8b7368bfbd84..695b606da4b1 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -450,9 +450,9 @@ struct kfd_ioctl_import_dmabuf_args { * KFD SMI(System Management Interface) events */ enum kfd_smi_event { - KFD_SMI_EVENT_NONE = 0, /* not used */ - KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ - KFD_SMI_EVENT_THERMAL_THROTTLE = 2, + KFD_SMI_EVENT_NONE = 0, /* not used */ + KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ + KFD_SMI_EVENT_THERMAL_THROTTLE = 2, KFD_SMI_EVENT_GPU_PRE_RESET = 3, KFD_SMI_EVENT_GPU_POST_RESET = 4, }; -- cgit v1.2.3 From 4ad1b0d410c88c7c8e8fd1298c9d2293b651e35c Mon Sep 17 00:00:00 2001 From: Maheshwar Ajja Date: Sat, 23 May 2020 03:05:26 +0200 Subject: media: v4l2-ctrls: Add encoder constant quality control When V4L2_CID_MPEG_VIDEO_BITRATE_MODE value is V4L2_MPEG_VIDEO_BITRATE_MODE_CQ, encoder will produce constant quality output indicated by V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY control value. Encoder will choose appropriate quantization parameter and bitrate to produce requested frame quality level. Signed-off-by: Maheshwar Ajja Reviewed-by: Hans Verkuil Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 62271418c1be..0f7e4388dcce 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -375,6 +375,7 @@ enum v4l2_mpeg_video_aspect { enum v4l2_mpeg_video_bitrate_mode { V4L2_MPEG_VIDEO_BITRATE_MODE_VBR = 0, V4L2_MPEG_VIDEO_BITRATE_MODE_CBR = 1, + V4L2_MPEG_VIDEO_BITRATE_MODE_CQ = 2, }; #define V4L2_CID_MPEG_VIDEO_BITRATE (V4L2_CID_MPEG_BASE+207) #define V4L2_CID_MPEG_VIDEO_BITRATE_PEAK (V4L2_CID_MPEG_BASE+208) @@ -742,6 +743,7 @@ enum v4l2_cid_mpeg_video_hevc_size_of_length_field { #define V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR (V4L2_CID_MPEG_BASE + 642) #define V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES (V4L2_CID_MPEG_BASE + 643) #define V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR (V4L2_CID_MPEG_BASE + 644) +#define V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY (V4L2_CID_MPEG_BASE + 645) /* MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */ #define V4L2_CID_MPEG_CX2341X_BASE (V4L2_CTRL_CLASS_MPEG | 0x1000) -- cgit v1.2.3 From 44f5b2fffc3213c919f53adddadb1a05519bdc0e Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Sun, 5 Jul 2020 01:41:00 +0200 Subject: media: v4l2-ctrl: Add frame-skip std encoder control Adds encoders standard v4l2 control for frame-skip. The control is a copy of a custom encoder control so that other v4l2 encoder drivers can use it. Signed-off-by: Stanimir Varbanov Reviewed-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 0f7e4388dcce..053827cda8e6 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -744,6 +744,12 @@ enum v4l2_cid_mpeg_video_hevc_size_of_length_field { #define V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES (V4L2_CID_MPEG_BASE + 643) #define V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR (V4L2_CID_MPEG_BASE + 644) #define V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY (V4L2_CID_MPEG_BASE + 645) +#define V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE (V4L2_CID_MPEG_BASE + 646) +enum v4l2_mpeg_video_frame_skip_mode { + V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_DISABLED = 0, + V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_LEVEL_LIMIT = 1, + V4L2_MPEG_VIDEO_FRAME_SKIP_MODE_BUF_LIMIT = 2, +}; /* MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */ #define V4L2_CID_MPEG_CX2341X_BASE (V4L2_CTRL_CLASS_MPEG | 0x1000) -- cgit v1.2.3 From 9d3a39a5f1e45827b008fff1ee9cf3cac3409665 Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Mon, 24 Aug 2020 15:10:34 -0700 Subject: block: grant IOPRIO_CLASS_RT to CAP_SYS_NICE CAP_SYS_ADMIN is too broad, and ionice fits into CAP_SYS_NICE's grouping. Retain CAP_SYS_ADMIN permission for backwards compatibility. Signed-off-by: Khazhismel Kumykov Reviewed-by: Bart Van Assche Acked-by: Serge Hallyn Signed-off-by: Jens Axboe --- include/uapi/linux/capability.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 395dd0df8d08..c6ca33034147 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -288,6 +288,8 @@ struct vfs_ns_cap_data { processes and setting the scheduling algorithm used by another process. */ /* Allow setting cpu affinity on other processes */ +/* Allow setting realtime ioprio class */ +/* Allow setting ioprio class on other processes */ #define CAP_SYS_NICE 23 -- cgit v1.2.3 From c1077616142907bb6ee987ecd136d6857ffd8787 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 1 Sep 2020 15:10:08 -0700 Subject: ip: expose inet sockopts through inet_diag Expose all exisiting inet sockopt bits through inet_diag for debug purpose. Corresponding changes in iproute2 ss will be submitted to output all these values. Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 5ba122c1949a..20ee93f0f876 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -160,6 +160,7 @@ enum { INET_DIAG_ULP_INFO, INET_DIAG_SK_BPF_STORAGES, INET_DIAG_CGROUP_ID, + INET_DIAG_SOCKOPT, __INET_DIAG_MAX, }; @@ -183,6 +184,23 @@ struct inet_diag_meminfo { __u32 idiag_tmem; }; +/* INET_DIAG_SOCKOPT */ + +struct inet_diag_sockopt { + __u8 recverr:1, + is_icsk:1, + freebind:1, + hdrincl:1, + mc_loop:1, + transparent:1, + mc_all:1, + nodefrag:1; + __u8 bind_address_no_port:1, + recverr_rfc4884:1, + defer_connect:1, + unused:5; +}; + /* INET_DIAG_VEGASINFO */ struct tcpvegas_info { -- cgit v1.2.3 From 6da73d15258a1e5e86d03d4ffba8776d17a8a287 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 2 Sep 2020 12:21:27 +0200 Subject: pidfd: support PIDFD_NONBLOCK in pidfd_open() Introduce PIDFD_NONBLOCK to support non-blocking pidfd file descriptors. Ever since the introduction of pidfds and more advanced async io various programming languages such as Rust have grown support for async event libraries. These libraries are created to help build epoll-based event loops around file descriptors. A common pattern is to automatically make all file descriptors they manage to O_NONBLOCK. For such libraries the EAGAIN error code is treated specially. When a function is called that returns EAGAIN the function isn't called again until the event loop indicates the the file descriptor is ready. Supporting EAGAIN when waiting on pidfds makes such libraries just work with little effort. In the following patch we will extend waitid() internally to support non-blocking pidfds. This introduces a new flag PIDFD_NONBLOCK that is equivalent to O_NONBLOCK. This follows the same patterns we have for other (anon inode) file descriptors such as EFD_NONBLOCK, IN_NONBLOCK, SFD_NONBLOCK, TFD_NONBLOCK and the same for close-on-exec flags. Suggested-by: Josh Triplett Signed-off-by: Christian Brauner Reviewed-by: Josh Triplett Reviewed-by: Oleg Nesterov Cc: Kees Cook Cc: Sargun Dhillon Cc: Oleg Nesterov Link: https://lore.kernel.org/lkml/20200811181236.GA18763@localhost/ Link: https://github.com/joshtriplett/async-pidfd Link: https://lore.kernel.org/r/20200902102130.147672-2-christian.brauner@ubuntu.com --- include/uapi/linux/pidfd.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/uapi/linux/pidfd.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h new file mode 100644 index 000000000000..5406fbc13074 --- /dev/null +++ b/include/uapi/linux/pidfd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_PIDFD_H +#define _UAPI_LINUX_PIDFD_H + +#include +#include + +/* Flags for pidfd_open(). */ +#define PIDFD_NONBLOCK O_NONBLOCK + +#endif /* _UAPI_LINUX_PIDFD_H */ -- cgit v1.2.3 From 1c101da8b971a36695319dce7a24711dc567a0dd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2019 10:30:15 +0000 Subject: arm64: mte: Allow user control of the tag check mode via prctl() By default, even if PROT_MTE is set on a memory range, there is no tag check fault reporting (SIGSEGV). Introduce a set of option to the exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag check fault mode: PR_MTE_TCF_NONE - no reporting (default) PR_MTE_TCF_SYNC - synchronous tag check fault reporting PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, context-switched by the kernel. Note that the kernel accesses to the user address space (e.g. read() system call) are not checked if the user thread tag checking mode is PR_MTE_TCF_NONE or PR_MTE_TCF_ASYNC. If the tag checking mode is PR_MTE_TCF_SYNC, the kernel makes a best effort to check its user address accesses, however it cannot always guarantee it. Signed-off-by: Catalin Marinas Cc: Will Deacon --- include/uapi/linux/prctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..2390ab324afa 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -233,6 +233,12 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From af5ce95282dc99d08a27a407a02c763dde1c5558 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 10 Dec 2019 11:19:15 +0000 Subject: arm64: mte: Allow user control of the generated random tags via prctl() The IRG, ADDG and SUBG instructions insert a random tag in the resulting address. Certain tags can be excluded via the GCR_EL1.Exclude bitmap when, for example, the user wants a certain colour for freed buffers. Since the GCR_EL1 register is not accessible at EL0, extend the prctl(PR_SET_TAGGED_ADDR_CTRL) interface to include a 16-bit field in the first argument for controlling which tags can be generated by the above instruction (an include rather than exclude mask). Note that by default all non-zero tags are excluded. This setting is per-thread. Signed-off-by: Catalin Marinas Cc: Will Deacon --- include/uapi/linux/prctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 2390ab324afa..7f0827705c9a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -239,6 +239,9 @@ struct prctl_mm_map { # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 -- cgit v1.2.3 From 2200aa7154cb7ef76bac93e98326883ba64bfa2e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 3 Jul 2020 15:12:57 +0100 Subject: arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset This regset allows read/write access to a ptraced process prctl(PR_SET_TAGGED_ADDR_CTRL) setting. Signed-off-by: Catalin Marinas Cc: Will Deacon Cc: Alan Hayward Cc: Luis Machado Cc: Omair Javaid --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 22220945a5fd..30f68b42eeb5 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -425,6 +425,7 @@ typedef struct elf64_shdr { #define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ #define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 16270a92355722e387e9ca19627c5a4d7bae1354 Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Tue, 18 Aug 2020 17:27:46 +0800 Subject: PCI: designware-ep: Fix the Header Type check The current check will result in the multiple function device fails to initialize. So fix the check by masking out the multiple function bit. Link: https://lore.kernel.org/r/20200818092746.24366-1-Zhiqiang.Hou@nxp.com Fixes: 0b24134f7888 ("PCI: dwc: Add validation that PCIe core is set to correct mode") Signed-off-by: Hou Zhiqiang Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f9701410d3b5..57a222014cd2 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -76,6 +76,7 @@ #define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ #define PCI_LATENCY_TIMER 0x0d /* 8 bits */ #define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_MASK 0x7f #define PCI_HEADER_TYPE_NORMAL 0 #define PCI_HEADER_TYPE_BRIDGE 1 #define PCI_HEADER_TYPE_CARDBUS 2 -- cgit v1.2.3 From 938c3efd9e650ca343d04e70d11a17c64119e17c Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 4 Sep 2020 17:14:53 +0100 Subject: bpf: Fix formatting in documentation for BPF helpers Fix a formatting error in the description of bpf_load_hdr_opt() (rst2man complains about a wrong indentation, but what is missing is actually a blank line before the bullet list). Fix and harmonise the formatting for other helpers. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200904161454.31135-3-quentin@isovalent.com --- include/uapi/linux/bpf.h | 87 +++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 42 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8dda13880957..90359cab501d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3349,38 +3349,38 @@ union bpf_attr { * Description * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) * Description * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * Return - * *sk* if casting is valid, or NULL otherwise. + * *sk* if casting is valid, or **NULL** otherwise. * * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *task*, which is a valid - * pointer to struct task_struct. To store the stacktrace, the - * bpf program provides *buf* with a nonnegative *size*. + * pointer to **struct task_struct**. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. * * The last argument, *flags*, holds the number of stack frames to * skip (from 0 to 255), masked with @@ -3410,12 +3410,12 @@ union bpf_attr { * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description * Load header option. Support reading a particular TCP header - * option for bpf program (BPF_PROG_TYPE_SOCK_OPS). + * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). * * If *flags* is 0, it will search the option from the - * sock_ops->skb_data. The comment in "struct bpf_sock_ops" + * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** * has details on what skb_data contains under different - * sock_ops->op. + * *skops*\ **->op**. * * The first byte of the *searchby_res* specifies the * kind that it wants to search. @@ -3435,7 +3435,7 @@ union bpf_attr { * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. * * To search for the standard window scale option (3), - * the searchby_res should be [ 3, 0, 0, .... 0 ]. + * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. * Note, kind-length must be 0 for regular option. * * Searching for No-Op (0) and End-of-Option-List (1) are @@ -3445,27 +3445,30 @@ union bpf_attr { * of a header option. * * Supported flags: + * * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the * saved_syn packet or the just-received syn packet. * * Return - * >0 when found, the header option is copied to *searchby_res*. - * The return value is the total length copied. + * > 0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. On failure, a + * negative error code is returned: * - * **-EINVAL** If param is invalid + * **-EINVAL** if a parameter is invalid. * - * **-ENOMSG** The option is not found + * **-ENOMSG** if the option is not found. * - * **-ENOENT** No syn packet available when - * **BPF_LOAD_HDR_OPT_TCP_SYN** is used + * **-ENOENT** if no syn packet is available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. * - * **-ENOSPC** Not enough space. Only *len* number of - * bytes are copied. + * **-ENOSPC** if there is not enough space. Only *len* number of + * bytes are copied. * - * **-EFAULT** Cannot parse the header options in the packet + * **-EFAULT** on failure to parse the header options in the + * packet. * - * **-EPERM** This helper cannot be used under the - * current sock_ops->op. + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. * * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) * Description @@ -3483,44 +3486,44 @@ union bpf_attr { * by searching the same option in the outgoing skb. * * This helper can only be called during - * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. * * Return * 0 on success, or negative error in case of failure: * - * **-EINVAL** If param is invalid + * **-EINVAL** If param is invalid. * - * **-ENOSPC** Not enough space in the header. - * Nothing has been written + * **-ENOSPC** if there is not enough space in the header. + * Nothing has been written * - * **-EEXIST** The option has already existed + * **-EEXIST** if the option already exists. * - * **-EFAULT** Cannot parse the existing header options + * **-EFAULT** on failrue to parse the existing header options. * - * **-EPERM** This helper cannot be used under the - * current sock_ops->op. + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. * * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) * Description * Reserve *len* bytes for the bpf header option. The - * space will be used by bpf_store_hdr_opt() later in - * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * space will be used by **bpf_store_hdr_opt**\ () later in + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. * - * If bpf_reserve_hdr_opt() is called multiple times, + * If **bpf_reserve_hdr_opt**\ () is called multiple times, * the total number of bytes will be reserved. * * This helper can only be called during - * BPF_SOCK_OPS_HDR_OPT_LEN_CB. + * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. * * Return * 0 on success, or negative error in case of failure: * - * **-EINVAL** if param is invalid + * **-EINVAL** if a parameter is invalid. * - * **-ENOSPC** Not enough space in the header. + * **-ENOSPC** if there is not enough space in the header. * - * **-EPERM** This helper cannot be used under the - * current sock_ops->op. + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. * * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) * Description @@ -3560,9 +3563,9 @@ union bpf_attr { * * long bpf_d_path(struct path *path, char *buf, u32 sz) * Description - * Return full path for given 'struct path' object, which - * needs to be the kernel BTF 'path' object. The path is - * returned in the provided buffer 'buf' of size 'sz' and + * Return full path for given **struct path** object, which + * needs to be the kernel BTF *path* object. The path is + * returned in the provided buffer *buf* of size *sz* and * is zero terminated. * * Return @@ -3573,7 +3576,7 @@ union bpf_attr { * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) * Description * Read *size* bytes from user space address *user_ptr* and store - * the data in *dst*. This is a wrapper of copy_from_user(). + * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. */ -- cgit v1.2.3 From 5205e919c9f0c5b48678f2c787871c96f665ca1b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 7 Sep 2020 12:56:08 +0300 Subject: net: bridge: mcast: add support for src list and filter mode dumping Support per port group src list (address and timer) and filter mode dumping. Protected by either multicast_lock or rcu. v3: add IPv6 support v2: require RCU or multicast_lock to traverse src groups Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c1227aecd38f..75a2ac479247 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -455,10 +455,31 @@ enum { enum { MDBA_MDB_EATTR_UNSPEC, MDBA_MDB_EATTR_TIMER, + MDBA_MDB_EATTR_SRC_LIST, + MDBA_MDB_EATTR_GROUP_MODE, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) +/* per mdb entry source */ +enum { + MDBA_MDB_SRCLIST_UNSPEC, + MDBA_MDB_SRCLIST_ENTRY, + __MDBA_MDB_SRCLIST_MAX +}; +#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1) + +/* per mdb entry per source attributes + * these are embedded in MDBA_MDB_SRCLIST_ENTRY + */ +enum { + MDBA_MDB_SRCATTR_UNSPEC, + MDBA_MDB_SRCATTR_ADDRESS, + MDBA_MDB_SRCATTR_TIMER, + __MDBA_MDB_SRCATTR_MAX +}; +#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1) + /* multicast router types */ enum { MDB_RTR_TYPE_DISABLED, -- cgit v1.2.3 From 0db0c34cfbc9838c1a14cb04dd880602abd699a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 Sep 2020 16:14:31 -0700 Subject: net: tighten the definition of interface statistics This patch is born out of an investigation into which IEEE statistics correspond to which struct rtnl_link_stats64 members. Turns out that there seems to be reasonable consensus on the matter, among many drivers. To save others the time (and it took more time than I'm comfortable admitting) I'm adding comments referring to IEEE attributes to struct rtnl_link_stats64. Up until now we had two forms of documentation for stats - in Documentation/ABI/testing/sysfs-class-net-statistics and the comments on struct rtnl_link_stats64 itself. While the former is very cautious in defining the expected behavior, the latter feel quite dated and may not be easy to understand for modern day driver author (e.g. rx_over_errors). At the same time modern systems are far more complex and once obvious definitions lost their clarity. For example - does rx_packet count at the MAC layer (aFramesReceivedOK)? packets processed correctly by hardware? received by the driver? or maybe received by the stack? I tried to clarify the expectations, further clarifications from others are very welcome. The part hardest to untangle is rx_over_errors vs rx_fifo_errors vs rx_missed_errors. After much deliberation I concluded that for modern HW only two of the counters will make sense. The distinction between internal FIFO overflow and packets dropped due to back-pressure from the host is likely too implementation (driver and device) specific to expose in the standard stats. Now - which two of those counters we select to use is anyone's pick: sysfs documentation suggests rx_over_errors counts packets which did not fit into buffers due to MTU being too small, which I reused. There don't seem to be many modern drivers using it (well, CAN drivers seem to love this statistic). Of the remaining two I picked rx_missed_errors to report device drops. bnxt reports it and it's folded into "drop"s in procfs (while rx_fifo_errors is an error, and modern devices usually receive the frame OK, they just can't admit it into the pipeline). Of the drivers I looked at only AMD Lance-like and NS8390-like use all three of these counters. rx_missed_errors counts missed frames, rx_over_errors counts overflow events, and rx_fifo_errors counts frames which were truncated because they didn't fit into buffers. This suggests that rx_fifo_errors may be the correct stat for truncated packets, but I'd think a FIFO stat counting truncated packets would be very confusing to a modern reader. v2: - add driver developer notes about ethtool stat count and reset - replace Ethernet with IEEE 802.3 to better indicate source of attrs - mention byte counters don't count FCS - clarify RX counter is from device to host - drop "sightly" from sysfs paragraph - add examples of ethtool stats - s/incoming/received/ s/incoming/transmitted/ Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 204 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 187 insertions(+), 17 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7fba4de511de..bf4667403cab 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -40,26 +40,197 @@ struct rtnl_link_stats { __u32 rx_nohandler; /* dropped, no handler found */ }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter should not include packets + * dropped by the device which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +242,7 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ + __u64 rx_nohandler; }; /* The struct should be in sync with struct ifmap */ -- cgit v1.2.3 From b131c96496b369c7b14125e7c50e89ac7cec8051 Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Tue, 8 Sep 2020 13:01:41 +0200 Subject: netfilter: nf_tables: add userdata support for nft_object Enables storing userdata for nft_object. Initially this will store an optional comment but can be extended in the future as needed. Adds new attribute NFTA_OBJ_USERDATA to nft_object. Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 543dc697b796..2a6e09dea1a0 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1559,6 +1559,7 @@ enum nft_ct_expectation_attributes { * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) * @NFTA_OBJ_HANDLE: object handle (NLA_U64) + * @NFTA_OBJ_USERDATA: user data (NLA_BINARY) */ enum nft_object_attributes { NFTA_OBJ_UNSPEC, @@ -1569,6 +1570,7 @@ enum nft_object_attributes { NFTA_OBJ_USE, NFTA_OBJ_HANDLE, NFTA_OBJ_PAD, + NFTA_OBJ_USERDATA, __NFTA_OBJ_MAX }; #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) -- cgit v1.2.3 From cd80ec795156346236e9b1cd9f5cbff5a9bbd212 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 Sep 2020 09:33:56 -0700 Subject: Input: allocate keycodes for notification-center, pickup-phone and hangup-phone New Lenovo Thinkpad models, e.g. the X1 Carbon 8th gen and the new T14 gen1 models have 3 new symbols / shortcuts on their F9-F11 keys (and the thinkpad_acpi driver receives 3 new "scancodes" for these): F9: Has a symbol resembling a rectangular speech balloon, the manual says the hotkey functions shows or hides the notification center F10: Has a symbol of a telephone horn which has been picked up from the receiver, the manual says: "Answer incoming calls" F11: Has a symbol of a telephone horn which is resting on the receiver, the manual says: "Decline incoming calls" We have no existing keycodes which are a good match for these, so add 3 new keycodes for these. I noticed that we have a hole in our keycodes between 0x1ba and 0x1c0 which does not seem to be reserved for any specific purpose, so these new 3 codes use 0x1bc - 0x1be, instead of starting at 0x27b. Acked-by: Henrique de Moraes Holschuh Acked-by: Andy Shevchenko Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 0c2e27d28e0a..b74821d09145 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -515,6 +515,9 @@ #define KEY_10CHANNELSUP 0x1b8 /* 10 channels up (10+) */ #define KEY_10CHANNELSDOWN 0x1b9 /* 10 channels down (10-) */ #define KEY_IMAGES 0x1ba /* AL Image Browser */ +#define KEY_NOTIFICATION_CENTER 0x1bc /* Show/hide the notification center */ +#define KEY_PICKUP_PHONE 0x1bd /* Answer incoming call */ +#define KEY_HANGUP_PHONE 0x1be /* Decline incoming call */ #define KEY_DEL_EOL 0x1c0 #define KEY_DEL_EOS 0x1c1 -- cgit v1.2.3 From bba013e1ca5e7150b42a1a1a1e852010d772edad Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 Sep 2020 09:58:09 -0700 Subject: Input: allocate keycode for Fn + right shift The last 2 generations of Lenovo Thinkpads send an acpi_thinkpad event when Fn + right shift is pressed. This is intended for use with "Lenovo Quick Clean" software, which disables the touchpad + kbd for 2 minutes on this key-combo so that healthcare workes can disinfect it. But there is no silkscreen print on the right-keyboard to indicate this, so add a KEY_FN_RIGHT_SHIFT keycode define to use for this key-combo. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200908135147.4044-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index b74821d09145..ee93428ced9a 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -545,6 +545,7 @@ #define KEY_FN_F 0x1e2 #define KEY_FN_S 0x1e3 #define KEY_FN_B 0x1e4 +#define KEY_FN_RIGHT_SHIFT 0x1e5 #define KEY_BRL_DOT1 0x1f1 #define KEY_BRL_DOT2 0x1f2 -- cgit v1.2.3 From 05b595e9c44acaca94192c6db430a489c1b212a7 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 9 Sep 2020 07:50:36 +0300 Subject: devlink: Introduce external controller flag A devlink eswitch port may represent PCI PF/VF ports of a controller. A controller either located on same system or it can be an external controller located in host where such NIC is plugged in. Add the ability for driver to specify if a port is for external controller. Use such flag in the mlx5_core driver. An example of an external controller having VF1 of PF0 belong to controller 1. $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev ens2f0pf0vf1 flavour pcivf pfnum 0 vfnum 1 external true splittable false function: hw_addr 00:00:00:00:00:00 $ devlink port show pci/0000:06:00.0/2 -jp { "port": { "pci/0000:06:00.0/2": { "type": "eth", "netdev": "ens2f0pf0vf1", "flavour": "pcivf", "pfnum": 0, "vfnum": 1, "external": true, "splittable": false, "function": { "hw_addr": "00:00:00:00:00:00" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index cfef4245ea5a..40823ed7e05a 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -458,6 +458,7 @@ enum devlink_attr { DEVLINK_ATTR_PORT_LANES, /* u32 */ DEVLINK_ATTR_PORT_SPLITTABLE, /* u8 */ + DEVLINK_ATTR_PORT_EXTERNAL, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 3a2d9588c4f79adae6a0e986b64ebdd5b38085c6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 9 Sep 2020 07:50:37 +0300 Subject: devlink: Introduce controller number A devlink port may be for a controller consist of PCI device. A devlink instance holds ports of two types of controllers. (1) controller discovered on same system where eswitch resides This is the case where PCI PF/VF of a controller and devlink eswitch instance both are located on a single system. (2) controller located on external host system. This is the case where a controller is located in one system and its devlink eswitch ports are located in a different system. When a devlink eswitch instance serves the devlink ports of both controllers together, PCI PF/VF numbers may overlap. Due to this a unique phys_port_name cannot be constructed. For example in below such system controller-0 and controller-1, each has PCI PF pf0 whose eswitch ports can be present in controller-0. These results in phys_port_name as "pf0" for both. Similar problem exists for VFs and upcoming Sub functions. An example view of two controller systems: --------------------------------------------------------- | | | --------- --------- ------- ------- | ----------- | | vf(s) | | sf(s) | |vf(s)| |sf(s)| | | server | | ------- ----/---- ---/----- ------- ---/--- ---/--- | | pci rc |=== | pf0 |______/________/ | pf1 |___/_______/ | | connect | | ------- ------- | ----------- | | controller_num=1 (no eswitch) | ------|-------------------------------------------------- (internal wire) | --------------------------------------------------------- | devlink eswitch ports and reps | | ----------------------------------------------------- | | |ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 |ctrl-0 | | | |pf0 | pf0vfN | pf0sfN | pf1 | pf1vfN |pf1sfN | | | ----------------------------------------------------- | | |ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 |ctrl-1 | | | |pf1 | pf1vfN | pf1sfN | pf1 | pf1vfN |pf0sfN | | | ----------------------------------------------------- | | | | | | --------- --------- ------- ------- | | | vf(s) | | sf(s) | |vf(s)| |sf(s)| | | ------- ----/---- ---/----- ------- ---/--- ---/--- | | | pf0 |______/________/ | pf1 |___/_______/ | | ------- ------- | | | | local controller_num=0 (eswitch) | --------------------------------------------------------- An example devlink port for external controller with controller number = 1 for a VF 1 of PF 0: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev ens2f0pf0vf1 flavour pcivf controller 1 pfnum 0 vfnum 1 external true splittable false function: hw_addr 00:00:00:00:00:00 $ devlink port show pci/0000:06:00.0/2 -jp { "port": { "pci/0000:06:00.0/2": { "type": "eth", "netdev": "ens2f0pf0vf1", "flavour": "pcivf", "controller": 1, "pfnum": 0, "vfnum": 1, "external": true, "splittable": false, "function": { "hw_addr": "00:00:00:00:00:00" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 40823ed7e05a..40d35145c879 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -459,6 +459,7 @@ enum devlink_attr { DEVLINK_ATTR_PORT_SPLITTABLE, /* u8 */ DEVLINK_ATTR_PORT_EXTERNAL, /* u8 */ + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From ad47ff330b26a9fefa882032be2122700e1625ab Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 9 Sep 2020 09:34:13 -0700 Subject: quota: widen timestamps for the fs_disk_quota structure Soon, XFS will support quota grace period expiration timestamps beyond the year 2038, widen the timestamp fields to handle the extra time bits. Internally, XFS now stores unsigned 34-bit quantities, so the extra 8 bits here should work fine. (Note that XFS is the only user of this structure.) Link: https://lore.kernel.org/r/20200909163413.GJ7955@magnolia Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/uapi/linux/dqblk_xfs.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h index 03d890b80ebc..16d73f54376d 100644 --- a/include/uapi/linux/dqblk_xfs.h +++ b/include/uapi/linux/dqblk_xfs.h @@ -66,7 +66,10 @@ typedef struct fs_disk_quota { __s32 d_btimer; /* similar to above; for disk blocks */ __u16 d_iwarns; /* # warnings issued wrt num inodes */ __u16 d_bwarns; /* # warnings issued wrt disk blocks */ - __s32 d_padding2; /* padding2 - for future use */ + __s8 d_itimer_hi; /* upper 8 bits of timer values */ + __s8 d_btimer_hi; + __s8 d_rtbtimer_hi; + __s8 d_padding2; /* padding2 - for future use */ __u64 d_rtb_hardlimit;/* absolute limit on realtime blks */ __u64 d_rtb_softlimit;/* preferred limit on RT disk blks */ __u64 d_rtbcount; /* # realtime blocks owned */ @@ -121,6 +124,12 @@ typedef struct fs_disk_quota { #define FS_DQ_RTBCOUNT (1<<14) #define FS_DQ_ACCT_MASK (FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT) +/* + * Quota expiration timestamps are 40-bit signed integers, with the upper 8 + * bits encoded in the _hi fields. + */ +#define FS_DQ_BIGTIME (1<<15) + /* * Various flags related to quotactl(2). */ -- cgit v1.2.3 From d1c10767837c4181f2e054865a58166fc117783b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 9 Sep 2020 15:54:46 +0200 Subject: quota: Expand comment describing d_itimer Expand comment describing d_itimer in struct fs_disk_quota. Reported-by: Matthew Wilcox Signed-off-by: Jan Kara --- include/uapi/linux/dqblk_xfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h index 16d73f54376d..c71d909addda 100644 --- a/include/uapi/linux/dqblk_xfs.h +++ b/include/uapi/linux/dqblk_xfs.h @@ -61,8 +61,9 @@ typedef struct fs_disk_quota { __u64 d_ino_softlimit;/* preferred inode limit */ __u64 d_bcount; /* # disk blocks owned by the user */ __u64 d_icount; /* # inodes owned by the user */ - __s32 d_itimer; /* zero if within inode limits */ - /* if not, we refuse service */ + __s32 d_itimer; /* Zero if within inode limits. If + * not, we refuse service at this time + * (in seconds since Unix epoch) */ __s32 d_btimer; /* similar to above; for disk blocks */ __u16 d_iwarns; /* # warnings issued wrt num inodes */ __u16 d_bwarns; /* # warnings issued wrt disk blocks */ -- cgit v1.2.3 From 0dd4ff93f4c8dba016ad79384007da4938cd54a1 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Wed, 19 Aug 2020 18:19:42 -0400 Subject: virtio: Implement get_shm_region for PCI transport On PCI the shm regions are found using capability entries; find a region by searching for the capability. Signed-off-by: Sebastien Boeuf Signed-off-by: Dr. David Alan Gilbert Signed-off-by: kbuild test robot Acked-by: Michael S. Tsirkin Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: "Michael S. Tsirkin" Signed-off-by: Miklos Szeredi --- include/uapi/linux/virtio_pci.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 90007a1abcab..3a86f36d7e3d 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -113,6 +113,8 @@ #define VIRTIO_PCI_CAP_DEVICE_CFG 4 /* PCI configuration access */ #define VIRTIO_PCI_CAP_PCI_CFG 5 +/* Additional shared memory capability */ +#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8 /* This is the PCI capability header: */ struct virtio_pci_cap { @@ -121,11 +123,18 @@ struct virtio_pci_cap { __u8 cap_len; /* Generic PCI field: capability length */ __u8 cfg_type; /* Identifies the structure. */ __u8 bar; /* Where to find it. */ - __u8 padding[3]; /* Pad to full dword. */ + __u8 id; /* Multiple capabilities of the same type */ + __u8 padding[2]; /* Pad to full dword. */ __le32 offset; /* Offset within bar. */ __le32 length; /* Length of the structure, in bytes. */ }; +struct virtio_pci_cap64 { + struct virtio_pci_cap cap; + __le32 offset_hi; /* Most sig 32 bits of offset */ + __le32 length_hi; /* Most sig 32 bits of length */ +}; + struct virtio_pci_notify_cap { struct virtio_pci_cap cap; __le32 notify_off_multiplier; /* Multiplier for queue_notify_off. */ -- cgit v1.2.3 From 38e895487afc2ed42c11045853cbb3fa20b52b6e Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Wed, 19 Aug 2020 18:19:43 -0400 Subject: virtio: Implement get_shm_region for MMIO transport On MMIO a new set of registers is defined for finding SHM regions. Add their definitions and use them to find the region. Signed-off-by: Sebastien Boeuf Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: "Michael S. Tsirkin" Signed-off-by: Miklos Szeredi --- include/uapi/linux/virtio_mmio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h index c4b09689ab64..0650f91bea6c 100644 --- a/include/uapi/linux/virtio_mmio.h +++ b/include/uapi/linux/virtio_mmio.h @@ -122,6 +122,17 @@ #define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 #define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 +/* Shared memory region id */ +#define VIRTIO_MMIO_SHM_SEL 0x0ac + +/* Shared memory region length, 64 bits in two halves */ +#define VIRTIO_MMIO_SHM_LEN_LOW 0x0b0 +#define VIRTIO_MMIO_SHM_LEN_HIGH 0x0b4 + +/* Shared memory region base address, 64 bits in two halves */ +#define VIRTIO_MMIO_SHM_BASE_LOW 0x0b8 +#define VIRTIO_MMIO_SHM_BASE_HIGH 0x0bc + /* Configuration atomicity value */ #define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc -- cgit v1.2.3 From 22f3787e9d95e72d1f09795f294fb010e2998f43 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 19 Aug 2020 18:19:46 -0400 Subject: virtiofs: set up virtio_fs dax_device Setup a dax device. Use the shm capability to find the cache entry and map it. The DAX window is accessed by the fs/dax.c infrastructure and must have struct pages (at least on x86). Use devm_memremap_pages() to map the DAX window PCI BAR and allocate struct page. Signed-off-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal Signed-off-by: Sebastien Boeuf Signed-off-by: Liu Bo Signed-off-by: Miklos Szeredi --- include/uapi/linux/virtio_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h index 3056b6e9f8ce..bea38291421b 100644 --- a/include/uapi/linux/virtio_fs.h +++ b/include/uapi/linux/virtio_fs.h @@ -16,4 +16,7 @@ struct virtio_fs_config { __le32 num_request_queues; } __attribute__((packed)); +/* For the id field in virtio_pci_shm_cap */ +#define VIRTIO_FS_SHMCAP_ID_CACHE 0 + #endif /* _UAPI_LINUX_VIRTIO_FS_H */ -- cgit v1.2.3 From fd1a1dc6f5aa7361e3562790336e116935f8fcfa Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 19 Aug 2020 18:19:49 -0400 Subject: virtiofs: implement FUSE_INIT map_alignment field The device communicates FUSE_SETUPMAPPING/FUSE_REMOVMAPPING alignment constraints via the FUST_INIT map_alignment field. Parse this field and ensure our DAX mappings meet the alignment constraints. We don't actually align anything differently since our mappings are already 2MB aligned. Just check the value when the connection is established. If it becomes necessary to honor arbitrary alignments in the future we'll have to adjust how mappings are sized. The upshot of this commit is that we can be confident that mappings will work even when emulating x86 on Power and similar combinations where the host page sizes are different. Signed-off-by: Stefan Hajnoczi Signed-off-by: Vivek Goyal Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 373cada89815..5b85819e045f 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -313,7 +313,9 @@ struct fuse_file_lock { * FUSE_CACHE_SYMLINKS: cache READLINK responses * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request - * FUSE_MAP_ALIGNMENT: map_alignment field is valid + * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for + * foffset and moffset fields in struct + * fuse_setupmapping_out and fuse_removemapping_one. */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) -- cgit v1.2.3 From ceec02d4354a317cacce4b053a580ea3c7fc6cdc Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 19 Aug 2020 18:19:50 -0400 Subject: virtiofs: introduce setupmapping/removemapping commands Introduce two new fuse commands to setup/remove memory mappings. This will be used to setup/tear down file mapping in dax window. Signed-off-by: Vivek Goyal Signed-off-by: Peng Tao Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5b85819e045f..60a7bfc787ce 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -894,4 +894,33 @@ struct fuse_copy_file_range_in { uint64_t flags; }; +#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) +struct fuse_setupmapping_in { + /* An already open handle */ + uint64_t fh; + /* Offset into the file to start the mapping */ + uint64_t foffset; + /* Length of mapping required */ + uint64_t len; + /* Flags, FUSE_SETUPMAPPING_FLAG_* */ + uint64_t flags; + /* Offset in Memory Window */ + uint64_t moffset; +}; + +struct fuse_removemapping_in { + /* number of fuse_removemapping_one follows */ + uint32_t count; +}; + +struct fuse_removemapping_one { + /* Offset into the dax window start the unmapping */ + uint64_t moffset; + /* Length of mapping required */ + uint64_t len; +}; + +#define FUSE_REMOVEMAPPING_MAX_ENTRY \ + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) + #endif /* _LINUX_FUSE_H */ -- cgit v1.2.3 From c2d0ad00d948de73c78f05d2b3e5bdfa605035cc Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 19 Aug 2020 18:19:51 -0400 Subject: virtiofs: implement dax read/write operations This patch implements basic DAX support. mmap() is not implemented yet and will come in later patches. This patch looks into implemeting read/write. We make use of interval tree to keep track of per inode dax mappings. Do not use dax for file extending writes, instead just send WRITE message to daemon (like we do for direct I/O path). This will keep write and i_size change atomic w.r.t crash. Signed-off-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Vivek Goyal Signed-off-by: Liu Bo Signed-off-by: Peng Tao Cc: Dave Chinner Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 60a7bfc787ce..8899e4862309 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -895,6 +895,7 @@ struct fuse_copy_file_range_in { }; #define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0) +#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1) struct fuse_setupmapping_in { /* An already open handle */ uint64_t fh; -- cgit v1.2.3 From 501cb008906631a019f3ab2104a17ef8b2651ed0 Mon Sep 17 00:00:00 2001 From: Paul Davey Date: Tue, 8 Sep 2020 10:04:06 +1200 Subject: ipmr: Add route table ID to netlink cache reports Insert the multicast route table ID as a Netlink attribute to Netlink cache report notifications. When multiple route tables are in use it is necessary to have a way to determine which route table a given cache report belongs to when receiving the cache report. Signed-off-by: Paul Davey Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 11c8c1fc1124..918f1ef32ffe 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -169,6 +169,7 @@ enum { IPMRA_CREPORT_SRC_ADDR, IPMRA_CREPORT_DST_ADDR, IPMRA_CREPORT_PKT, + IPMRA_CREPORT_TABLE, __IPMRA_CREPORT_MAX }; #define IPMRA_CREPORT_MAX (__IPMRA_CREPORT_MAX - 1) -- cgit v1.2.3 From c8715a8e9f38906e73d6d78764216742db13ba0e Mon Sep 17 00:00:00 2001 From: Paul Davey Date: Tue, 8 Sep 2020 10:04:07 +1200 Subject: ipmr: Add high byte of VIF ID to igmpmsg Use the unused3 byte in struct igmpmsg to hold the high 8 bits of the VIF ID. If using more than 255 IPv4 multicast interfaces it is necessary to have access to a VIF ID for cache reports that is wider than 8 bits, the VIF ID present in the igmpmsg reports sent to mroute_sk was only 8 bits wide in the igmpmsg header. Adding the high 8 bits of the 16 bit VIF ID in the unused byte allows use of more than 255 IPv4 multicast interfaces. Signed-off-by: Paul Davey Signed-off-by: David S. Miller --- include/uapi/linux/mroute.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h index 918f1ef32ffe..1a42f5f9b31b 100644 --- a/include/uapi/linux/mroute.h +++ b/include/uapi/linux/mroute.h @@ -113,8 +113,8 @@ struct igmpmsg { __u32 unused1,unused2; unsigned char im_msgtype; /* What is this */ unsigned char im_mbz; /* Must be zero */ - unsigned char im_vif; /* Interface (this ought to be a vifi_t!) */ - unsigned char unused3; + unsigned char im_vif; /* Low 8 bits of Interface */ + unsigned char im_vif_hi; /* High 8 bits of Interface */ struct in_addr im_src,im_dst; }; -- cgit v1.2.3 From 1aef5b4391f0c75c0a1523706a7b0311846ee12f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 10 Sep 2020 13:33:14 -0700 Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup() This should be "current" not "skb". Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Cc: Link: https://lore.kernel.org/bpf/20200910203314.70018-1-songliubraving@fb.com --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90359cab501d..7dd314176df7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1447,8 +1447,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) -- cgit v1.2.3 From 5823833c9adab5a9ce5500e7f1ce7deeff00ee73 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Sat, 11 Jul 2020 14:52:36 +0200 Subject: media: v4l2-ctrl: Add VP9 codec levels Add menu control for VP9 codec levels. A total of 14 levels are defined for Profile 0 (8bit) and Profile 2 (10bit). Each level is a set of constrained bitstreams coded with targeted resolutions, frame rates, and bitrates. The definitions have been taken from webm project [1]. [1] https://www.webmproject.org/vp9/levels/ Signed-off-by: Stanimir Varbanov Reviewed-by: Nicolas Dufresne Reviewed-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 053827cda8e6..a184c4939438 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -651,6 +651,23 @@ enum v4l2_mpeg_video_vp9_profile { V4L2_MPEG_VIDEO_VP9_PROFILE_2 = 2, V4L2_MPEG_VIDEO_VP9_PROFILE_3 = 3, }; +#define V4L2_CID_MPEG_VIDEO_VP9_LEVEL (V4L2_CID_MPEG_BASE+513) +enum v4l2_mpeg_video_vp9_level { + V4L2_MPEG_VIDEO_VP9_LEVEL_1_0 = 0, + V4L2_MPEG_VIDEO_VP9_LEVEL_1_1 = 1, + V4L2_MPEG_VIDEO_VP9_LEVEL_2_0 = 2, + V4L2_MPEG_VIDEO_VP9_LEVEL_2_1 = 3, + V4L2_MPEG_VIDEO_VP9_LEVEL_3_0 = 4, + V4L2_MPEG_VIDEO_VP9_LEVEL_3_1 = 5, + V4L2_MPEG_VIDEO_VP9_LEVEL_4_0 = 6, + V4L2_MPEG_VIDEO_VP9_LEVEL_4_1 = 7, + V4L2_MPEG_VIDEO_VP9_LEVEL_5_0 = 8, + V4L2_MPEG_VIDEO_VP9_LEVEL_5_1 = 9, + V4L2_MPEG_VIDEO_VP9_LEVEL_5_2 = 10, + V4L2_MPEG_VIDEO_VP9_LEVEL_6_0 = 11, + V4L2_MPEG_VIDEO_VP9_LEVEL_6_1 = 12, + V4L2_MPEG_VIDEO_VP9_LEVEL_6_2 = 13, +}; /* CIDs for HEVC encoding. */ -- cgit v1.2.3 From e47168f3d1b14af5281cf50c59561d59d28201f9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 31 Aug 2020 08:30:44 +0000 Subject: powerpc/8xx: Support 16k hugepages with 4k pages The 8xx has 4 page sizes: 4k, 16k, 512k and 8M 4k and 16k can be selected at build time as standard page sizes, and 512k and 8M are hugepages. When 4k standard pages are selected, 16k pages are not available. Allow 16k pages as hugepages when 4k pages are used. To allow that, implement arch_make_huge_pte() which receives the necessary arguments to allow setting the PTE in accordance with the page size: - 512 k pages must have _PAGE_HUGE and _PAGE_SPS. They are set by pte_mkhuge(). arch_make_huge_pte() does nothing. - 16 k pages must have only _PAGE_SPS. arch_make_huge_pte() clears _PAGE_HUGE. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a518abc29266a708dfbccc8fce9ae6694fe4c2c6.1598862623.git.christophe.leroy@csgroup.eu --- include/uapi/linux/mman.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB -- cgit v1.2.3 From f8910ffa81b085030dc54814c85d338c26a3157e Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Tue, 15 Sep 2020 15:18:17 +0800 Subject: ipmi:msghandler: retry to get device id on an error We fail to get the BMCS's device id with low probability when loading the ipmi driver and it causes BMC device registration failed. When this issue occurs we got below kernel prints: [Wed Sep 9 19:52:03 2020] ipmi_si IPI0001:00: IPMI message handler: device id demangle failed: -22 [Wed Sep 9 19:52:03 2020] IPMI BT: using default values [Wed Sep 9 19:52:03 2020] IPMI BT: req2rsp=5 secs retries=2 [Wed Sep 9 19:52:03 2020] ipmi_si IPI0001:00: Unable to get the device id: -5 [Wed Sep 9 19:52:04 2020] ipmi_si IPI0001:00: Unable to register device: error -5 When this issue happens, we want to manually unload the driver and try to load it again, but it can't be unloaded by 'rmmod' as it is already 'in use'. We add a print in handle_one_recv_msg(), when this issue happens, the msg we received is "Recv: 1c 01 d5", which means the data_len is 1, data[0] is 0xd5 (completion code), which means "bmc cannot execute command. Command, or request parameter(s), not supported in present state". Debug code: static int handle_one_recv_msg(struct ipmi_smi *intf, struct ipmi_smi_msg *msg) { printk("Recv: %*ph\n", msg->rsp_size, msg->rsp); ... ... } Then in ipmi_demangle_device_id(), it returned '-EINVAL' as 'data_len < 7' and 'data[0] != 0'. We created this patch to retry the get device id when this error happens. We reproduced this issue again and the retry succeed on the first retry, we finally got the correct msg and then all is ok: Recv: 1c 01 00 01 81 05 84 02 af db 07 00 01 00 b9 00 10 00 So use a retry machanism in this patch to give bmc more opportunity to correctly response kernel when we received specific completion codes. Signed-off-by: Xianting Tian Message-Id: <20200915071817.4484-1-tian.xianting@h3c.com> [Cleaned up the verbage a bit in the header and prints.] Signed-off-by: Corey Minyard --- include/uapi/linux/ipmi_msgdefs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipmi_msgdefs.h b/include/uapi/linux/ipmi_msgdefs.h index c2b23a9fdf3d..0934af3b8037 100644 --- a/include/uapi/linux/ipmi_msgdefs.h +++ b/include/uapi/linux/ipmi_msgdefs.h @@ -69,6 +69,8 @@ #define IPMI_ERR_MSG_TRUNCATED 0xc6 #define IPMI_REQ_LEN_INVALID_ERR 0xc7 #define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8 +#define IPMI_DEVICE_IN_FW_UPDATE_ERR 0xd1 +#define IPMI_DEVICE_IN_INIT_ERR 0xd2 #define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */ #define IPMI_LOST_ARBITRATION_ERR 0x81 #define IPMI_BUS_ERR 0x82 -- cgit v1.2.3 From 9a27a33027f22a716ce362be48d70ae0eb012ab7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Sep 2020 17:11:52 -0700 Subject: ethtool: add standard pause stats Currently drivers have to report their pause frames statistics via ethtool -S, and there is a wide variety of names used for these statistics. Add the two statistics defined in IEEE 802.3x to the standard API. Create a new ethtool request header flag for including statistics in the response to GET commands. Always create the ETHTOOL_A_PAUSE_STATS nest in replies when flag is set. Testing if driver declares the op is not a reliable way of checking if any stats will actually be included and therefore we don't want to give the impression that presence of ETHTOOL_A_PAUSE_STATS indicates driver support. Note that this patch does not include PFC counters, which may fit better in dcbnl? But mostly I don't need them/have a setup to test them so I haven't looked deeply into exposing them :) v3: - add a helper for "uninitializing" stats, rather than a cryptic memset() (Andrew) Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 5dcd24cb33ea..9cee6df01a10 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -91,9 +91,12 @@ enum { #define ETHTOOL_FLAG_COMPACT_BITSETS (1 << 0) /* provide optional reply for SET or ACT requests */ #define ETHTOOL_FLAG_OMIT_REPLY (1 << 1) +/* request statistics, if supported by the driver */ +#define ETHTOOL_FLAG_STATS (1 << 2) #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \ - ETHTOOL_FLAG_OMIT_REPLY) + ETHTOOL_FLAG_OMIT_REPLY | \ + ETHTOOL_FLAG_STATS) enum { ETHTOOL_A_HEADER_UNSPEC, @@ -376,12 +379,25 @@ enum { ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ ETHTOOL_A_PAUSE_RX, /* u8 */ ETHTOOL_A_PAUSE_TX, /* u8 */ + ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ /* add new constants above here */ __ETHTOOL_A_PAUSE_CNT, ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) }; +enum { + ETHTOOL_A_PAUSE_STAT_UNSPEC, + ETHTOOL_A_PAUSE_STAT_PAD, + + ETHTOOL_A_PAUSE_STAT_TX_FRAMES, + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, + + /* add new constants above here */ + __ETHTOOL_A_PAUSE_STAT_CNT, + ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) +}; + /* EEE */ enum { -- cgit v1.2.3 From e2ce94dc1d89e0f76ddd202cea72e0f505083d0a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 15 Sep 2020 11:40:57 +0300 Subject: devlink: introduce the health reporter test command Introduce a test command for health reporters. User might use this command to trigger test event on a reporter if the reporter supports it. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 40d35145c879..631f5bdf1707 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -122,6 +122,8 @@ enum devlink_command { DEVLINK_CMD_TRAP_POLICER_NEW, DEVLINK_CMD_TRAP_POLICER_DEL, + DEVLINK_CMD_HEALTH_REPORTER_TEST, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 -- cgit v1.2.3 From ef15314aa5de955c6afd87d512e8b00f5ac08d06 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 15 Sep 2020 16:45:40 -0700 Subject: bpf: Add BPF_PROG_BIND_MAP syscall This syscall binds a map to a program. Returns success if the map is already bound to the program. Signed-off-by: YiFei Zhu Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: YiFei Zhu Link: https://lore.kernel.org/bpf/20200915234543.3220146-3-sdf@google.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7dd314176df7..a22812561064 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -124,6 +124,7 @@ enum bpf_cmd { BPF_ENABLE_STATS, BPF_ITER_CREATE, BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, }; enum bpf_map_type { @@ -658,6 +659,12 @@ union bpf_attr { __u32 flags; } iter_create; + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 4af8b3d3eb5032fe6f4a8104c48c176bf68a6946 Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Wed, 16 Sep 2020 13:17:23 -0600 Subject: coresight: stm: Support marked packet STP_PACKET_MARKED is not supported by STM currently. Add STM_FLAG_MARKED to support marked packet in STM. Signed-off-by: Tingwei Zhang Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200916191737.4001561-3-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/coresight-stm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/coresight-stm.h b/include/uapi/linux/coresight-stm.h index 8847dbf24151..7ff3709c01b8 100644 --- a/include/uapi/linux/coresight-stm.h +++ b/include/uapi/linux/coresight-stm.h @@ -5,6 +5,7 @@ #include #define STM_FLAG_TIMESTAMPED _BITUL(3) +#define STM_FLAG_MARKED _BITUL(4) #define STM_FLAG_GUARANTEED _BITUL(7) /* -- cgit v1.2.3 From 78a3ea5557137b0811f3c5a020afaafa7b61d6aa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Sep 2020 10:51:32 -0700 Subject: net: remove comments on struct rtnl_link_stats We removed the misleading comments from struct rtnl_link_stats64 when we added proper kdoc. struct rtnl_link_stats has the same inline comments, so remove them, too. Signed-off-by: Jakub Kicinski Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bf4667403cab..c4b23f06f69e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,7 +36,7 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; /** -- cgit v1.2.3 From d65a977087f94f3bb97f351798d864556063109a Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Tue, 8 Sep 2020 12:03:03 -0700 Subject: nl80211: advertise supported channel width in S1G S1G supports 5 channel widths: 1, 2, 4, 8, and 16. One channel width is allowed per frequency in each operating class, so it makes more sense to advertise the specific channel width allowed. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200908190323.15814-3-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0584e0d349f0..4e119c6afa31 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3737,6 +3737,16 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_OFFSET: frequency offset in KHz + * @NL80211_FREQUENCY_ATTR_1MHZ: 1 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_2MHZ: 2 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_4MHZ: 4 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_8MHZ: 8 MHz operation is allowed + * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed + * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -3768,6 +3778,11 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_WMM, NL80211_FREQUENCY_ATTR_NO_HE, NL80211_FREQUENCY_ATTR_OFFSET, + NL80211_FREQUENCY_ATTR_1MHZ, + NL80211_FREQUENCY_ATTR_2MHZ, + NL80211_FREQUENCY_ATTR_4MHZ, + NL80211_FREQUENCY_ATTR_8MHZ, + NL80211_FREQUENCY_ATTR_16MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit v1.2.3 From 291c49ded2fda1fd0d7bd6056de99fe47d2332e6 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Fri, 11 Sep 2020 00:05:29 +0000 Subject: nl80211: Add FILS discovery support FILS discovery attribute, NL80211_ATTR_FILS_DISCOVERY, is nested which supports following parameters as given in IEEE Std 802.11ai-2016, Annex C.3 MIB detail: (1) NL80211_FILS_DISCOVERY_ATTR_INT_MIN - Minimum packet interval (2) NL80211_FILS_DISCOVERY_ATTR_INT_MAX - Maximum packet interval (3) NL80211_FILS_DISCOVERY_ATTR_TMPL - Template data Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20200805011838.28166-2-alokad@codeaurora.org [fix attribute and other names, use NLA_RANGE(), use policy only once] Link: https://lore.kernel.org/r/010101747a7b38a8-306f06b2-9061-4baf-81c1-054a42a18e22-000000@us-west-2.amazonses.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4e119c6afa31..ad2bea3b07e3 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2513,6 +2513,10 @@ enum nl80211_commands { * @NL80211_ATTR_HE_6GHZ_CAPABILITY: HE 6 GHz Band Capability element (from * association request when used with NL80211_CMD_NEW_STATION). * + * @NL80211_ATTR_FILS_DISCOVERY: Optional parameter to configure FILS + * discovery. It is a nested attribute, see + * &enum nl80211_fils_discovery_attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2995,6 +2999,8 @@ enum nl80211_attrs { NL80211_ATTR_HE_6GHZ_CAPABILITY, + NL80211_ATTR_FILS_DISCOVERY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5867,6 +5873,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_SAE_OFFLOAD_AP: Device wants to do SAE authentication * in AP mode (SAE password is passed as part of the start AP command). * + * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery + * frames transmission + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5925,6 +5934,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_OPERATING_CHANNEL_VALIDATION, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK, NL80211_EXT_FEATURE_SAE_OFFLOAD_AP, + NL80211_EXT_FEATURE_FILS_DISCOVERY, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -7019,4 +7029,38 @@ enum nl80211_iftype_akm_attributes { NL80211_IFTYPE_AKM_ATTR_MAX = __NL80211_IFTYPE_AKM_ATTR_LAST - 1, }; +/** + * enum nl80211_fils_discovery_attributes - FILS discovery configuration + * from IEEE Std 802.11ai-2016, Annex C.3 MIB detail. + * + * @__NL80211_FILS_DISCOVERY_ATTR_INVALID: Invalid + * + * @NL80211_FILS_DISCOVERY_ATTR_INT_MIN: Minimum packet interval (u32, TU). + * Allowed range: 0..10000 (TU = Time Unit) + * @NL80211_FILS_DISCOVERY_ATTR_INT_MAX: Maximum packet interval (u32, TU). + * Allowed range: 0..10000 (TU = Time Unit) + * @NL80211_FILS_DISCOVERY_ATTR_TMPL: Template data for FILS discovery action + * frame including the headers. + * + * @__NL80211_FILS_DISCOVERY_ATTR_LAST: Internal + * @NL80211_FILS_DISCOVERY_ATTR_MAX: highest attribute + */ +enum nl80211_fils_discovery_attributes { + __NL80211_FILS_DISCOVERY_ATTR_INVALID, + + NL80211_FILS_DISCOVERY_ATTR_INT_MIN, + NL80211_FILS_DISCOVERY_ATTR_INT_MAX, + NL80211_FILS_DISCOVERY_ATTR_TMPL, + + /* keep last */ + __NL80211_FILS_DISCOVERY_ATTR_LAST, + NL80211_FILS_DISCOVERY_ATTR_MAX = __NL80211_FILS_DISCOVERY_ATTR_LAST - 1 +}; + +/* + * FILS discovery template minimum length with action frame headers and + * mandatory fields. + */ +#define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42 + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 7443dcd1f1718a355e9c4ebeb7e95c3f9f27bb5f Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Fri, 11 Sep 2020 00:33:00 +0000 Subject: nl80211: Unsolicited broadcast probe response support This patch adds new attributes to support unsolicited broadcast probe response transmission used for in-band discovery in 6GHz band (IEEE P802.11ax/D6.0 26.17.2.3.2, AP behavior for fast passive scanning). The new attribute, NL80211_ATTR_UNSOL_BCAST_PROBE_RESP, is nested which supports following parameters: (1) NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT - Packet interval (2) NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL - Template data Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/010101747a946698-aac263ae-2ed3-4dab-9590-0bc7131214e1-000000@us-west-2.amazonses.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ad2bea3b07e3..bdc90b8dfd24 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2517,6 +2517,10 @@ enum nl80211_commands { * discovery. It is a nested attribute, see * &enum nl80211_fils_discovery_attributes. * + * @NL80211_ATTR_UNSOL_BCAST_PROBE_RESP: Optional parameter to configure + * unsolicited broadcast probe response. It is a nested attribute, see + * &enum nl80211_unsol_bcast_probe_resp_attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3001,6 +3005,8 @@ enum nl80211_attrs { NL80211_ATTR_FILS_DISCOVERY, + NL80211_ATTR_UNSOL_BCAST_PROBE_RESP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5876,6 +5882,9 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_FILS_DISCOVERY: Driver/device supports FILS discovery * frames transmission * + * @NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP: Driver/device supports + * unsolicited broadcast probe response transmission + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5935,6 +5944,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK, NL80211_EXT_FEATURE_SAE_OFFLOAD_AP, NL80211_EXT_FEATURE_FILS_DISCOVERY, + NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -7063,4 +7073,30 @@ enum nl80211_fils_discovery_attributes { */ #define NL80211_FILS_DISCOVERY_TMPL_MIN_LEN 42 +/** + * enum nl80211_unsol_bcast_probe_resp_attributes - Unsolicited broadcast probe + * response configuration. Applicable only in 6GHz. + * + * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID: Invalid + * + * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT: Maximum packet interval (u32, TU). + * Allowed range: 0..20 (TU = Time Unit). IEEE P802.11ax/D6.0 + * 26.17.2.3.2 (AP behavior for fast passive scanning). + * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL: Unsolicited broadcast probe response + * frame template (binary). + * + * @__NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST: Internal + * @NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX: highest attribute + */ +enum nl80211_unsol_bcast_probe_resp_attributes { + __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INVALID, + + NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT, + NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL, + + /* keep last */ + __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST, + NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX = + __NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_LAST - 1 +}; #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From c6ff213fe5b8696c9539a1b34ff03de9306dfff9 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 8 Sep 2020 18:01:48 +0200 Subject: fuse: add submount support to - Add fuse_attr.flags - Add FUSE_ATTR_SUBMOUNT This is a flag for fuse_attr.flags that indicates that the given entry resides on a different filesystem than the parent, and as such should have a different st_dev. - Add FUSE_SUBMOUNTS The client sets this flag if it supports automounting directories. Signed-off-by: Max Reitz Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 8899e4862309..7233502ea991 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -172,6 +172,9 @@ * - add FUSE_WRITE_KILL_PRIV flag * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag + * + * 7.32 + * - add flags to fuse_attr, add FUSE_ATTR_SUBMOUNT, add FUSE_SUBMOUNTS */ #ifndef _LINUX_FUSE_H @@ -207,7 +210,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 31 +#define FUSE_KERNEL_MINOR_VERSION 32 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -231,7 +234,7 @@ struct fuse_attr { uint32_t gid; uint32_t rdev; uint32_t blksize; - uint32_t padding; + uint32_t flags; }; struct fuse_kstatfs { @@ -316,6 +319,7 @@ struct fuse_file_lock { * FUSE_MAP_ALIGNMENT: init_out.map_alignment contains log2(byte alignment) for * foffset and moffset fields in struct * fuse_setupmapping_out and fuse_removemapping_one. + * FUSE_SUBMOUNTS: kernel supports auto-mounting directory submounts */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -344,6 +348,7 @@ struct fuse_file_lock { #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) #define FUSE_MAP_ALIGNMENT (1 << 26) +#define FUSE_SUBMOUNTS (1 << 27) /** * CUSE INIT request/reply flags @@ -419,6 +424,13 @@ struct fuse_file_lock { */ #define FUSE_FSYNC_FDATASYNC (1 << 0) +/** + * fuse_attr flags + * + * FUSE_ATTR_SUBMOUNT: Object is a submount root + */ +#define FUSE_ATTR_SUBMOUNT (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ -- cgit v1.2.3 From f92970c694b36a4dbac2b650b173c78c0f0954cc Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 17 Sep 2020 18:13:23 -0700 Subject: devlink: add timeout information to status_notify Add a timeout element to the DEVLINK_CMD_FLASH_UPDATE_STATUS netlink message for use by a userland utility to show that a particular firmware flash activity may take a long but bounded time to finish. Also add a handy helper for drivers to make use of the new timeout value. UI usage hints: - if non-zero, add timeout display to the end of the status line [component] status_msg ( Xm Ys : Am Bs ) using the timeout value for Am Bs and updating the Xm Ys every second - if the timeout expires while awaiting the next update, display something like [component] status_msg ( timeout reached : Am Bs ) - if new status notify messages are received, remove the timeout and start over Signed-off-by: Shannon Nelson Reviewed-by: Jakub Kicinski Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 631f5bdf1707..a2ecc8b00611 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -462,6 +462,9 @@ enum devlink_attr { DEVLINK_ATTR_PORT_EXTERNAL, /* u8 */ DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, /* u32 */ + + DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, /* u64 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From daef1ee3798b25e8464b8eb618eaa74b8f423ac7 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Fri, 18 Sep 2020 08:17:27 +0700 Subject: tipc: introduce encryption master key In addition to the supported cluster & per-node encryption keys for the en/decryption of TIPC messages, we now introduce one option for user to set a cluster key as 'master key', which is simply a symmetric key like the former but has a longer life cycle. It has two purposes: - Authentication of new member nodes in the cluster. New nodes, having no knowledge of current session keys in the cluster will still be able to join the cluster as long as they know the master key. This is because all neighbor discovery (LINK_CONFIG) messages must be encrypted with this key. - Encryption of session encryption keys during automatic exchange and update of those.This is a feature we will introduce in a later commit in this series. We insert the new key into the currently unused slot 0 in the key array and start using it immediately once the user has set it. After joining, a node only knowing the master key should be fully communicable to existing nodes in the cluster, although those nodes may have their own session keys activated (i.e. not the master one). To support this, we define a 'grace period', starting from the time a node itself reports having no RX keys, so the existing nodes will use the master key for encryption instead. The grace period can be extended but will automatically stop after e.g. 5 seconds without a new report. This is also the basis for later key exchanging feature as the new node will be impossible to decrypt anything without the support from master key. For user to set a master key, we define a new netlink flag - 'TIPC_NLA_NODE_KEY_MASTER', so it can be added to the current 'set key' netlink command to specify the setting key to be a master key. Above all, the traditional cluster/per-node key mechanism is guaranteed to work when user comes not to use this master key option. This is also compatible to legacy nodes without the feature supported. Even this master key can be updated without any interruption of cluster connectivity but is so is needed, this has to be coordinated and set by the user. Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- include/uapi/linux/tipc_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index dc0d23a50e69..d484baa9d365 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -165,6 +165,7 @@ enum { TIPC_NLA_NODE_UP, /* flag */ TIPC_NLA_NODE_ID, /* data */ TIPC_NLA_NODE_KEY, /* data */ + TIPC_NLA_NODE_KEY_MASTER, /* flag */ __TIPC_NLA_NODE_MAX, TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 -- cgit v1.2.3 From 23700da29b83e859a8c3727fddd33ba74c4f3a39 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Fri, 18 Sep 2020 08:17:29 +0700 Subject: tipc: add automatic rekeying for encryption key Rekeying is required for security since a key is less secure when using for a long time. Also, key will be detached when its nonce value (or seqno ...) is exhausted. We now make the rekeying process automatic and configurable by user. Basically, TIPC will at a specific interval generate a new key by using the kernel 'Random Number Generator' cipher, then attach it as the node TX key and securely distribute to others in the cluster as RX keys (- the key exchange). The automatic key switching will then take over, and make the new key active shortly. Afterwards, the traffic from this node will be encrypted with the new session key. The same can happen in peer nodes but not necessarily at the same time. For simplicity, the automatically generated key will be initiated as a per node key. It is not too hard to also support a cluster key rekeying (e.g. a given node will generate a unique cluster key and update to the others in the cluster...), but that doesn't bring much benefit, while a per-node key is even more secure. We also enable user to force a rekeying or change the rekeying interval via netlink, the new 'set key' command option: 'TIPC_NLA_NODE_REKEYING' is added for these purposes as follows: - A value >= 1 will be set as the rekeying interval (in minutes); - A value of 0 will disable the rekeying; - A value of 'TIPC_REKEYING_NOW' (~0) will force an immediate rekeying; The default rekeying interval is (60 * 24) minutes i.e. done every day. There isn't any restriction for the value but user shouldn't set it too small or too large which results in an "ineffective" rekeying (thats ok for testing though). Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 2 ++ include/uapi/linux/tipc_netlink.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index add01db1daef..80ea15e12113 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -254,6 +254,8 @@ static inline int tipc_aead_key_size(struct tipc_aead_key *key) return sizeof(*key) + key->keylen; } +#define TIPC_REKEYING_NOW (~0U) + /* The macros and functions below are deprecated: */ diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d484baa9d365..d847dd671d79 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -166,6 +166,7 @@ enum { TIPC_NLA_NODE_ID, /* data */ TIPC_NLA_NODE_KEY, /* data */ TIPC_NLA_NODE_KEY_MASTER, /* flag */ + TIPC_NLA_NODE_REKEYING, /* u32 */ __TIPC_NLA_NODE_MAX, TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 -- cgit v1.2.3 From 55f13311785cebd60b9bab9ca7fd64205436c462 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Fri, 18 Sep 2020 14:14:51 -0500 Subject: ethtool: Add 100base-FX link mode entries Add entries for the 100base-FX full and half duplex supported modes. $ ethtool eth0 Supported ports: [ FIBRE ] Supported link modes: 100baseFX/Half 100baseFX/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: No Supported FEC modes: Not reported Advertised link modes: 100baseFX/Half 100baseFX/Full Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: Not reported Speed: 100Mb/s Duplex: Full Auto-negotiation: off Port: MII PHYAD: 1 Transceiver: external Supports Wake-on: gs Wake-on: d SecureOn password: 00:00:00:00:00:00 Current message level: 0x00000000 (0) Link detected: yes Signed-off-by: Dan Murphy Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b4f2d134e713..9ca87bc73c44 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1617,6 +1617,8 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT = 87, ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT = 88, ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, + ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, + ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; -- cgit v1.2.3 From c12fa88c6d16ed3865072d91154cff6fd1cd9cd4 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 10 Sep 2020 20:25:08 +0800 Subject: vfio: Fix typo of the device_state A typo fix ("_RUNNNG" => "_RUNNING") in comment block of the uapi header. Signed-off-by: Zenghui Yu Reviewed-by: Cornelia Huck Reviewed-by: Kirti Wankhede Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 920470502329..d4bd39e124bf 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -462,7 +462,7 @@ struct vfio_region_gfx_edid { * 5. Resumed * |--------->| * - * 0. Default state of VFIO device is _RUNNNG when the user application starts. + * 0. Default state of VFIO device is _RUNNING when the user application starts. * 1. During normal shutdown of the user application, the user application may * optionally change the VFIO device state from _RUNNING to _STOP. This * transition is optional. The vendor driver must support this transition but -- cgit v1.2.3 From 7d6e1329652ed971d1b6e0e7bea66fba5044e271 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Tue, 15 Sep 2020 15:05:18 -0400 Subject: vfio iommu: Add dma available capability Commit 492855939bdb ("vfio/type1: Limit DMA mappings per container") added the ability to limit the number of memory backed DMA mappings. However on s390x, when lazy mapping is in use, we use a very large number of concurrent mappings. Let's provide the current allowable number of DMA mappings to userspace via the IOMMU info chain so that userspace can take appropriate mitigation. Signed-off-by: Matthew Rosato Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 920470502329..3891e03d3af0 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1039,6 +1039,21 @@ struct vfio_iommu_type1_info_cap_migration { __u64 max_dirty_bitmap_size; /* in bytes */ }; +/* + * The DMA available capability allows to report the current number of + * simultaneously outstanding DMA mappings that are allowed. + * + * The structure below defines version 1 of this capability. + * + * avail: specifies the current number of outstanding DMA mappings allowed. + */ +#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 + +struct vfio_iommu_type1_info_dma_avail { + struct vfio_info_cap_header header; + __u32 avail; +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** -- cgit v1.2.3 From 15b760c37ad3c3f2b922506eaca4ca8b4292e621 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 21 Sep 2020 15:17:15 +0300 Subject: nitro_enclaves: Add ioctl interface definition The Nitro Enclaves driver handles the enclave lifetime management. This includes enclave creation, termination and setting up its resources such as memory and CPU. An enclave runs alongside the VM that spawned it. It is abstracted as a process running in the VM that launched it. The process interacts with the NE driver, that exposes an ioctl interface for creating an enclave and setting up its resources. Changelog v9 -> v10 * Update commit message to include the changelog before the SoB tag(s). v8 -> v9 * No changes. v7 -> v8 * Add NE custom error codes for user space memory regions not backed by pages multiple of 2 MiB, invalid flags and enclave CID. * Add max flag value for enclave image load info. v6 -> v7 * Clarify in the ioctls documentation that the return value is -1 and errno is set on failure. * Update the error code value for NE_ERR_INVALID_MEM_REGION_SIZE as it gets in user space as value 25 (ENOTTY) instead of 515. Update the NE custom error codes values range to not be the same as the ones defined in include/linux/errno.h, although these are not propagated to user space. v5 -> v6 * Fix typo in the description about the NE CPU pool. * Update documentation to kernel-doc format. * Remove the ioctl to query API version. v4 -> v5 * Add more details about the ioctl calls usage e.g. error codes, file descriptors used. * Update the ioctl to set an enclave vCPU to not return a file descriptor. * Add specific NE error codes. v3 -> v4 * Decouple NE ioctl interface from KVM API. * Add NE API version and the corresponding ioctl call. * Add enclave / image load flags options. v2 -> v3 * Remove the GPL additional wording as SPDX-License-Identifier is already in place. v1 -> v2 * Add ioctl for getting enclave image load metadata. * Update NE_ENCLAVE_START ioctl name to NE_START_ENCLAVE. * Add entry in Documentation/userspace-api/ioctl/ioctl-number.rst for NE ioctls. * Update NE ioctls definition based on the updated ioctl range for major and minor. Reviewed-by: Alexander Graf Reviewed-by: Stefan Hajnoczi Signed-off-by: Alexandru Vasile Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20200921121732.44291-2-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/nitro_enclaves.h | 359 ++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 include/uapi/linux/nitro_enclaves.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nitro_enclaves.h b/include/uapi/linux/nitro_enclaves.h new file mode 100644 index 000000000000..b945073fe544 --- /dev/null +++ b/include/uapi/linux/nitro_enclaves.h @@ -0,0 +1,359 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + */ + +#ifndef _UAPI_LINUX_NITRO_ENCLAVES_H_ +#define _UAPI_LINUX_NITRO_ENCLAVES_H_ + +#include + +/** + * DOC: Nitro Enclaves (NE) Kernel Driver Interface + */ + +/** + * NE_CREATE_VM - The command is used to create a slot that is associated with + * an enclave VM. + * The generated unique slot id is an output parameter. + * The ioctl can be invoked on the /dev/nitro_enclaves fd, before + * setting any resources, such as memory and vCPUs, for an + * enclave. Memory and vCPUs are set for the slot mapped to an enclave. + * A NE CPU pool has to be set before calling this function. The + * pool can be set after the NE driver load, using + * /sys/module/nitro_enclaves/parameters/ne_cpus. + * Its format is the detailed in the cpu-lists section: + * https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html + * CPU 0 and its siblings have to remain available for the + * primary / parent VM, so they cannot be set for enclaves. Full + * CPU core(s), from the same NUMA node, need(s) to be included + * in the CPU pool. + * + * Context: Process context. + * Return: + * * Enclave file descriptor - Enclave file descriptor used with + * ioctl calls to set vCPUs and memory + * regions, then start the enclave. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_to_user() failure. + * * ENOMEM - Memory allocation failure for internal + * bookkeeping variables. + * * NE_ERR_NO_CPUS_AVAIL_IN_POOL - No NE CPU pool set / no CPUs available + * in the pool. + * * Error codes from get_unused_fd_flags() and anon_inode_getfile(). + * * Error codes from the NE PCI device request. + */ +#define NE_CREATE_VM _IOR(0xAE, 0x20, __u64) + +/** + * NE_ADD_VCPU - The command is used to set a vCPU for an enclave. The vCPU can + * be auto-chosen from the NE CPU pool or it can be set by the + * caller, with the note that it needs to be available in the NE + * CPU pool. Full CPU core(s), from the same NUMA node, need(s) to + * be associated with an enclave. + * The vCPU id is an input / output parameter. If its value is 0, + * then a CPU is chosen from the enclave CPU pool and returned via + * this parameter. + * The ioctl can be invoked on the enclave fd, before an enclave + * is started. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() / copy_to_user() failure. + * * ENOMEM - Memory allocation failure for internal + * bookkeeping variables. + * * EIO - Current task mm is not the same as the one + * that created the enclave. + * * NE_ERR_NO_CPUS_AVAIL_IN_POOL - No CPUs available in the NE CPU pool. + * * NE_ERR_VCPU_ALREADY_USED - The provided vCPU is already used. + * * NE_ERR_VCPU_NOT_IN_CPU_POOL - The provided vCPU is not available in the + * NE CPU pool. + * * NE_ERR_VCPU_INVALID_CPU_CORE - The core id of the provided vCPU is invalid + * or out of range. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state + * (init = before being started). + * * NE_ERR_INVALID_VCPU - The provided vCPU is not in the available + * CPUs range. + * * Error codes from the NE PCI device request. + */ +#define NE_ADD_VCPU _IOWR(0xAE, 0x21, __u32) + +/** + * NE_GET_IMAGE_LOAD_INFO - The command is used to get information needed for + * in-memory enclave image loading e.g. offset in + * enclave memory to start placing the enclave image. + * The image load info is an input / output parameter. + * It includes info provided by the caller - flags - + * and returns the offset in enclave memory where to + * start placing the enclave image. + * The ioctl can be invoked on the enclave fd, before + * an enclave is started. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() / copy_to_user() failure. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state (init = + * before being started). + * * NE_ERR_INVALID_FLAG_VALUE - The value of the provided flag is invalid. + */ +#define NE_GET_IMAGE_LOAD_INFO _IOWR(0xAE, 0x22, struct ne_image_load_info) + +/** + * NE_SET_USER_MEMORY_REGION - The command is used to set a memory region for an + * enclave, given the allocated memory from the + * userspace. Enclave memory needs to be from the + * same NUMA node as the enclave CPUs. + * The user memory region is an input parameter. It + * includes info provided by the caller - flags, + * memory size and userspace address. + * The ioctl can be invoked on the enclave fd, + * before an enclave is started. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() failure. + * * EINVAL - Invalid physical memory region(s) e.g. + * unaligned address. + * * EIO - Current task mm is not the same as + * the one that created the enclave. + * * ENOMEM - Memory allocation failure for internal + * bookkeeping variables. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state + * (init = before being started). + * * NE_ERR_INVALID_MEM_REGION_SIZE - The memory size of the region is not + * multiple of 2 MiB. + * * NE_ERR_INVALID_MEM_REGION_ADDR - Invalid user space address given. + * * NE_ERR_UNALIGNED_MEM_REGION_ADDR - Unaligned user space address given. + * * NE_ERR_MEM_REGION_ALREADY_USED - The memory region is already used. + * * NE_ERR_MEM_NOT_HUGE_PAGE - The memory region is not backed by + * huge pages. + * * NE_ERR_MEM_DIFFERENT_NUMA_NODE - The memory region is not from the same + * NUMA node as the CPUs. + * * NE_ERR_MEM_MAX_REGIONS - The number of memory regions set for + * the enclave reached maximum. + * * NE_ERR_INVALID_PAGE_SIZE - The memory region is not backed by + * pages multiple of 2 MiB. + * * NE_ERR_INVALID_FLAG_VALUE - The value of the provided flag is invalid. + * * Error codes from get_user_pages(). + * * Error codes from the NE PCI device request. + */ +#define NE_SET_USER_MEMORY_REGION _IOW(0xAE, 0x23, struct ne_user_memory_region) + +/** + * NE_START_ENCLAVE - The command is used to trigger enclave start after the + * enclave resources, such as memory and CPU, have been set. + * The enclave start info is an input / output parameter. It + * includes info provided by the caller - enclave cid and + * flags - and returns the cid (if input cid is 0). + * The ioctl can be invoked on the enclave fd, after an + * enclave slot is created and resources, such as memory and + * vCPUs are set for an enclave. + * + * Context: Process context. + * Return: + * * 0 - Logic succesfully completed. + * * -1 - There was a failure in the ioctl logic. + * On failure, errno is set to: + * * EFAULT - copy_from_user() / copy_to_user() failure. + * * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state + * (init = before being started). + * * NE_ERR_NO_MEM_REGIONS_ADDED - No memory regions are set. + * * NE_ERR_NO_VCPUS_ADDED - No vCPUs are set. + * * NE_ERR_FULL_CORES_NOT_USED - Full core(s) not set for the enclave. + * * NE_ERR_ENCLAVE_MEM_MIN_SIZE - Enclave memory is less than minimum + * memory size (64 MiB). + * * NE_ERR_INVALID_FLAG_VALUE - The value of the provided flag is invalid. + * * NE_ERR_INVALID_ENCLAVE_CID - The provided enclave CID is invalid. + * * Error codes from the NE PCI device request. + */ +#define NE_START_ENCLAVE _IOWR(0xAE, 0x24, struct ne_enclave_start_info) + +/** + * DOC: NE specific error codes + */ + +/** + * NE_ERR_VCPU_ALREADY_USED - The provided vCPU is already used. + */ +#define NE_ERR_VCPU_ALREADY_USED (256) +/** + * NE_ERR_VCPU_NOT_IN_CPU_POOL - The provided vCPU is not available in the + * NE CPU pool. + */ +#define NE_ERR_VCPU_NOT_IN_CPU_POOL (257) +/** + * NE_ERR_VCPU_INVALID_CPU_CORE - The core id of the provided vCPU is invalid + * or out of range of the NE CPU pool. + */ +#define NE_ERR_VCPU_INVALID_CPU_CORE (258) +/** + * NE_ERR_INVALID_MEM_REGION_SIZE - The user space memory region size is not + * multiple of 2 MiB. + */ +#define NE_ERR_INVALID_MEM_REGION_SIZE (259) +/** + * NE_ERR_INVALID_MEM_REGION_ADDR - The user space memory region address range + * is invalid. + */ +#define NE_ERR_INVALID_MEM_REGION_ADDR (260) +/** + * NE_ERR_UNALIGNED_MEM_REGION_ADDR - The user space memory region address is + * not aligned. + */ +#define NE_ERR_UNALIGNED_MEM_REGION_ADDR (261) +/** + * NE_ERR_MEM_REGION_ALREADY_USED - The user space memory region is already used. + */ +#define NE_ERR_MEM_REGION_ALREADY_USED (262) +/** + * NE_ERR_MEM_NOT_HUGE_PAGE - The user space memory region is not backed by + * contiguous physical huge page(s). + */ +#define NE_ERR_MEM_NOT_HUGE_PAGE (263) +/** + * NE_ERR_MEM_DIFFERENT_NUMA_NODE - The user space memory region is backed by + * pages from different NUMA nodes than the CPUs. + */ +#define NE_ERR_MEM_DIFFERENT_NUMA_NODE (264) +/** + * NE_ERR_MEM_MAX_REGIONS - The supported max memory regions per enclaves has + * been reached. + */ +#define NE_ERR_MEM_MAX_REGIONS (265) +/** + * NE_ERR_NO_MEM_REGIONS_ADDED - The command to start an enclave is triggered + * and no memory regions are added. + */ +#define NE_ERR_NO_MEM_REGIONS_ADDED (266) +/** + * NE_ERR_NO_VCPUS_ADDED - The command to start an enclave is triggered and no + * vCPUs are added. + */ +#define NE_ERR_NO_VCPUS_ADDED (267) +/** + * NE_ERR_ENCLAVE_MEM_MIN_SIZE - The enclave memory size is lower than the + * minimum supported. + */ +#define NE_ERR_ENCLAVE_MEM_MIN_SIZE (268) +/** + * NE_ERR_FULL_CORES_NOT_USED - The command to start an enclave is triggered and + * full CPU cores are not set. + */ +#define NE_ERR_FULL_CORES_NOT_USED (269) +/** + * NE_ERR_NOT_IN_INIT_STATE - The enclave is not in init state when setting + * resources or triggering start. + */ +#define NE_ERR_NOT_IN_INIT_STATE (270) +/** + * NE_ERR_INVALID_VCPU - The provided vCPU is out of range of the available CPUs. + */ +#define NE_ERR_INVALID_VCPU (271) +/** + * NE_ERR_NO_CPUS_AVAIL_IN_POOL - The command to create an enclave is triggered + * and no CPUs are available in the pool. + */ +#define NE_ERR_NO_CPUS_AVAIL_IN_POOL (272) +/** + * NE_ERR_INVALID_PAGE_SIZE - The user space memory region is not backed by pages + * multiple of 2 MiB. + */ +#define NE_ERR_INVALID_PAGE_SIZE (273) +/** + * NE_ERR_INVALID_FLAG_VALUE - The provided flag value is invalid. + */ +#define NE_ERR_INVALID_FLAG_VALUE (274) +/** + * NE_ERR_INVALID_ENCLAVE_CID - The provided enclave CID is invalid, either + * being a well-known value or the CID of the + * parent / primary VM. + */ +#define NE_ERR_INVALID_ENCLAVE_CID (275) + +/** + * DOC: Image load info flags + */ + +/** + * NE_EIF_IMAGE - Enclave Image Format (EIF) + */ +#define NE_EIF_IMAGE (0x01) + +#define NE_IMAGE_LOAD_MAX_FLAG_VAL (0x02) + +/** + * struct ne_image_load_info - Info necessary for in-memory enclave image + * loading (in / out). + * @flags: Flags to determine the enclave image type + * (e.g. Enclave Image Format - EIF) (in). + * @memory_offset: Offset in enclave memory where to start placing the + * enclave image (out). + */ +struct ne_image_load_info { + __u64 flags; + __u64 memory_offset; +}; + +/** + * DOC: User memory region flags + */ + +/** + * NE_DEFAULT_MEMORY_REGION - Memory region for enclave general usage. + */ +#define NE_DEFAULT_MEMORY_REGION (0x00) + +#define NE_MEMORY_REGION_MAX_FLAG_VAL (0x01) + +/** + * struct ne_user_memory_region - Memory region to be set for an enclave (in). + * @flags: Flags to determine the usage for the memory region (in). + * @memory_size: The size, in bytes, of the memory region to be set for + * an enclave (in). + * @userspace_addr: The start address of the userspace allocated memory of + * the memory region to set for an enclave (in). + */ +struct ne_user_memory_region { + __u64 flags; + __u64 memory_size; + __u64 userspace_addr; +}; + +/** + * DOC: Enclave start info flags + */ + +/** + * NE_ENCLAVE_PRODUCTION_MODE - Start enclave in production mode. + */ +#define NE_ENCLAVE_PRODUCTION_MODE (0x00) +/** + * NE_ENCLAVE_DEBUG_MODE - Start enclave in debug mode. + */ +#define NE_ENCLAVE_DEBUG_MODE (0x01) + +#define NE_ENCLAVE_START_MAX_FLAG_VAL (0x02) + +/** + * struct ne_enclave_start_info - Setup info necessary for enclave start (in / out). + * @flags: Flags for the enclave to start with (e.g. debug mode) (in). + * @enclave_cid: Context ID (CID) for the enclave vsock device. If 0 as + * input, the CID is autogenerated by the hypervisor and + * returned back as output by the driver (in / out). + */ +struct ne_enclave_start_info { + __u64 flags; + __u64 enclave_cid; +}; + +#endif /* _UAPI_LINUX_NITRO_ENCLAVES_H_ */ -- cgit v1.2.3 From c7f0207b613033c56b1217032d2f6326d0c69217 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Sep 2020 21:11:33 -0700 Subject: fscrypt: make "#define fscrypt_policy" user-only The fscrypt UAPI header defines fscrypt_policy to fscrypt_policy_v1, for source compatibility with old userspace programs. Internally, the kernel doesn't want that compatibility definition. Instead, fscrypt_private.h #undefs it and re-defines it to a union. That works for now. However, in order to add fscrypt_operations::get_dummy_policy(), we'll need to forward declare 'union fscrypt_policy' in include/linux/fscrypt.h. That would cause build errors because "fscrypt_policy" is used in ioctl numbers. To avoid this, modify the UAPI header to make the fscrypt_policy compatibility definition conditional on !__KERNEL__, and make the ioctls use fscrypt_policy_v1 instead of fscrypt_policy. Note that this doesn't change the actual ioctl numbers. Acked-by: Jeff Layton Link: https://lore.kernel.org/r/20200917041136.178600-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/uapi/linux/fscrypt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 7875709ccfeb..e5de60336938 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -45,7 +45,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 -- cgit v1.2.3 From 9c4258c78a2a7624c79b797f40ae2dbfd2555e26 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:18 +0300 Subject: net: bridge: mdb: add support to extend add/del commands Since the MDB add/del code expects an exact struct br_mdb_entry we can't really add any extensions, thus add a new nested attribute at the level of MDBA_SET_ENTRY called MDBA_SET_ENTRY_ATTRS which will be used to pass all new options via netlink attributes. This patch doesn't change anything functionally since the new attribute is not used yet, only parsed. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 75a2ac479247..dc52f8cffa0d 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -530,10 +530,22 @@ struct br_mdb_entry { enum { MDBA_SET_ENTRY_UNSPEC, MDBA_SET_ENTRY, + MDBA_SET_ENTRY_ATTRS, __MDBA_SET_ENTRY_MAX, }; #define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) +/* [MDBA_SET_ENTRY_ATTRS] = { + * [MDBE_ATTR_xxx] + * ... + * } + */ +enum { + MDBE_ATTR_UNSPEC, + __MDBE_ATTR_MAX, +}; +#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) + /* Embedded inside LINK_XSTATS_TYPE_BRIDGE */ enum { BRIDGE_XSTATS_UNSPEC, -- cgit v1.2.3 From 88d4bd180419a7cde3947f191dc4e26fbb19f80b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:19 +0300 Subject: net: bridge: mdb: add support for add/del/dump of entries with source Add new mdb attributes (MDBE_ATTR_SOURCE for setting, MDBA_MDB_EATTR_SOURCE for dumping) to allow add/del and dump of mdb entries with a source address (S,G). New S,G entries are created with filter mode of MCAST_INCLUDE. The same attributes are used for IPv4 and IPv6, they're validated and parsed based on their protocol. S,G host joined entries which are added by user are not allowed yet. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index dc52f8cffa0d..3e6377c865eb 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -457,6 +457,7 @@ enum { MDBA_MDB_EATTR_TIMER, MDBA_MDB_EATTR_SRC_LIST, MDBA_MDB_EATTR_GROUP_MODE, + MDBA_MDB_EATTR_SOURCE, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) @@ -542,6 +543,7 @@ enum { */ enum { MDBE_ATTR_UNSPEC, + MDBE_ATTR_SOURCE, __MDBE_ATTR_MAX, }; #define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1) -- cgit v1.2.3 From 8f8cb77e0b22d9044d8d57ab3bb18ea8d0474752 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:21 +0300 Subject: net: bridge: mcast: add rt_protocol field to the port group struct We need to be able to differentiate between pg entries created by user-space and the kernel when we start generating S,G entries for IGMPv3/MLDv2's fast path. User-space entries are created by default as RTPROT_STATIC and the kernel entries are RTPROT_KERNEL. Later we can allow user-space to provide the entry rt_protocol so we can differentiate between who added the entries specifically (e.g. clag, admin, frr etc). Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 3e6377c865eb..1054f151078d 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -458,6 +458,7 @@ enum { MDBA_MDB_EATTR_SRC_LIST, MDBA_MDB_EATTR_GROUP_MODE, MDBA_MDB_EATTR_SOURCE, + MDBA_MDB_EATTR_RTPROT, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) -- cgit v1.2.3 From 8266a0491e92d39dc9af739e8380a0daa9b8836b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:24 +0300 Subject: net: bridge: mcast: handle port group filter modes We need to handle group filter mode transitions and initial state. To change a port group's INCLUDE -> EXCLUDE mode (or when we have added a new port group in EXCLUDE mode) we need to add that port to all of *,G ports' S,G entries for proper replication. When the EXCLUDE state is changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be called after the source list processing because the assumption is that all of the group's S,G entries will be created before transitioning to EXCLUDE mode, i.e. most importantly its blocked entries will already be added so it will not get automatically added to them. The transition EXCLUDE -> INCLUDE happens only when a port group timer expires, it requires us to remove that port from all of *,G ports' S,G entries where it was automatically added previously. Finally when we are adding a new S,G entry we must add all of *,G's EXCLUDE ports to it. In order to distinguish automatically added *,G EXCLUDE ports we have a new port group flag - MDB_PG_FLAGS_STAR_EXCL. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 1054f151078d..e4bd30a25f6b 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -518,6 +518,7 @@ struct br_mdb_entry { __u8 state; #define MDB_FLAGS_OFFLOAD (1 << 0) #define MDB_FLAGS_FAST_LEAVE (1 << 1) +#define MDB_FLAGS_STAR_EXCL (1 << 2) __u8 flags; __u16 vid; struct { -- cgit v1.2.3 From 9116ffbf1dd71f953ffda4198d01f82d3ca16df8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 22 Sep 2020 10:30:25 +0300 Subject: net: bridge: mcast: add support for blocked port groups When excluding S,G entries we need a way to block a particular S,G,port. The new port group flag is managed based on the source's timer as per RFCs 3376 and 3810. When a source expires and its port group is in EXCLUDE mode, it will be blocked. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index e4bd30a25f6b..4c687686aa8f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -519,6 +519,7 @@ struct br_mdb_entry { #define MDB_FLAGS_OFFLOAD (1 << 0) #define MDB_FLAGS_FAST_LEAVE (1 << 1) #define MDB_FLAGS_STAR_EXCL (1 << 2) +#define MDB_FLAGS_BLOCKED (1 << 3) __u8 flags; __u16 vid; struct { -- cgit v1.2.3 From 77ebdabe8de7c02f43c6de3357f79ff96f9f0579 Mon Sep 17 00:00:00 2001 From: Elena Petrova Date: Fri, 18 Sep 2020 16:42:16 +0100 Subject: crypto: af_alg - add extra parameters for DRBG interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the user-space RNG interface: 1. Add entropy input via ALG_SET_DRBG_ENTROPY setsockopt option; 2. Add additional data input via sendmsg syscall. This allows DRBG to be tested with test vectors, for example for the purpose of CAVP testing, which otherwise isn't possible. To prevent erroneous use of entropy input, it is hidden under CRYPTO_USER_API_RNG_CAVP config option and requires CAP_SYS_ADMIN to succeed. Signed-off-by: Elena Petrova Acked-by: Stephan Müller Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- include/uapi/linux/if_alg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index bc2bcdec377b..60b7c2efd921 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -35,6 +35,7 @@ struct af_alg_iv { #define ALG_SET_OP 3 #define ALG_SET_AEAD_ASSOCLEN 4 #define ALG_SET_AEAD_AUTHSIZE 5 +#define ALG_SET_DRBG_ENTROPY 6 /* Operations */ #define ALG_OP_DECRYPT 0 -- cgit v1.2.3 From 2a36ab717e8fe678d98f81c14a0b124712719840 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 23 Sep 2020 16:36:16 -0700 Subject: rseq/membarrier: Add MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ This patchset is based on Google-internal RSEQ work done by Paul Turner and Andrew Hunter. When working with per-CPU RSEQ-based memory allocations, it is sometimes important to make sure that a global memory location is no longer accessed from RSEQ critical sections. For example, there can be two per-CPU lists, one is "active" and accessed per-CPU, while another one is inactive and worked on asynchronously "off CPU" (e.g. garbage collection is performed). Then at some point the two lists are swapped, and a fast RCU-like mechanism is required to make sure that the previously active list is no longer accessed. This patch introduces such a mechanism: in short, membarrier() syscall issues an IPI to a CPU, restarting a potentially active RSEQ critical section on the CPU. Signed-off-by: Peter Oskolkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mathieu Desnoyers Link: https://lkml.kernel.org/r/20200923233618.2572849-1-posk@google.com --- include/uapi/linux/membarrier.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index 5891d7614c8c..737605897f36 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -114,6 +114,26 @@ * If this command is not implemented by an * architecture, -EINVAL is returned. * Returns 0 on success. + * @MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: + * Ensure the caller thread, upon return from + * system call, that all its running thread + * siblings have any currently running rseq + * critical sections restarted if @flags + * parameter is 0; if @flags parameter is + * MEMBARRIER_CMD_FLAG_CPU, + * then this operation is performed only + * on CPU indicated by @cpu_id. If this command is + * not implemented by an architecture, -EINVAL + * is returned. A process needs to register its + * intent to use the private expedited rseq + * command prior to using it, otherwise + * this command returns -EPERM. + * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ: + * Register the process intent to use + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ. + * If this command is not implemented by an + * architecture, -EINVAL is returned. + * Returns 0 on success. * @MEMBARRIER_CMD_SHARED: * Alias to MEMBARRIER_CMD_GLOBAL. Provided for * header backward compatibility. @@ -131,9 +151,15 @@ enum membarrier_cmd { MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 5), MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 6), + MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8), /* Alias for header backward compatibility. */ MEMBARRIER_CMD_SHARED = MEMBARRIER_CMD_GLOBAL, }; +enum membarrier_cmd_flag { + MEMBARRIER_CMD_FLAG_CPU = (1 << 0), +}; + #endif /* _UAPI_LINUX_MEMBARRIER_H */ -- cgit v1.2.3 From a5fa25adf03d4b063aece74ba70ccbb3a71af122 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:03:56 -0700 Subject: bpf: Change bpf_sk_release and bpf_sk_*cgroup_id to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON The previous patch allows the networking bpf prog to use the bpf_skc_to_*() helpers to get a PTR_TO_BTF_ID socket pointer, e.g. "struct tcp_sock *". It allows the bpf prog to read all the fields of the tcp_sock. This patch changes the bpf_sk_release() and bpf_sk_*cgroup_id() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. For example, the following will work: sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); if (!sk) return; tp = bpf_skc_to_tcp_sock(sk); if (!tp) { bpf_sk_release(sk); return; } lsndtime = tp->lsndtime; /* Pass tp to bpf_sk_release() will also work */ bpf_sk_release(tp); Since PTR_TO_BTF_ID could be NULL, the helper taking ARG_PTR_TO_BTF_ID_SOCK_COMMON has to check for NULL at runtime. A btf_id of "struct sock" may not always mean a fullsock. Regardless the helper's running context may get a non-fullsock or not, considering fullsock check/handling is pretty cheap, it is better to keep the same verifier expectation on helper that takes ARG_PTR_TO_BTF_ID* will be able to handle the minisock situation. In the bpf_sk_*cgroup_id() case, it will try to get a fullsock by using sk_to_full_sk() as its skb variant bpf_sk"b"_*cgroup_id() has already been doing. bpf_sk_release can already handle minisock, so nothing special has to be done. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000356.3856047-1-kafai@fb.com --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a22812561064..c96a56d9c3be 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2512,7 +2512,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(void *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -3234,11 +3234,11 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) + * u64 bpf_sk_cgroup_id(void *sk) * Description * Return the cgroup v2 id of the socket *sk*. * - * *sk* must be a non-**NULL** pointer to a full socket, e.g. one + * *sk* must be a non-**NULL** pointer to a socket, e.g. one * returned from **bpf_sk_lookup_xxx**\ (), * **bpf_sk_fullsock**\ (), etc. The format of returned id is * same as in **bpf_skb_cgroup_id**\ (). @@ -3248,7 +3248,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) + * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) * Description * Return id of cgroup v2 that is ancestor of cgroup associated * with the *sk* at the *ancestor_level*. The root cgroup is at -- cgit v1.2.3 From 592a3498648af000e93dff2d36229ab11cd8c7f6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:02 -0700 Subject: bpf: Change bpf_sk_storage_*() to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON This patch changes the bpf_sk_storage_*() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. A micro benchmark has been done on a "cgroup_skb/egress" bpf program which does a bpf_sk_storage_get(). It was driven by netperf doing a 4096 connected UDP_STREAM test with 64bytes packet. The stats from "kernel.bpf_stats_enabled" shows no meaningful difference. The sk_storage_get_btf_proto, sk_storage_delete_btf_proto, btf_sk_storage_get_proto, and btf_sk_storage_delete_proto are no longer needed, so they are removed. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20200925000402.3856307-1-kafai@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c96a56d9c3be..0ec6dbeb17a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2861,6 +2861,7 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. + * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). * * long bpf_send_signal(u32 sig) * Description -- cgit v1.2.3 From c0df236e1394970f3503a8fb103de95d000014ca Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:09 -0700 Subject: bpf: Change bpf_tcp_*_syncookie to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON This patch changes the bpf_tcp_*_syncookie() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Lorenz Bauer Link: https://lore.kernel.org/bpf/20200925000409.3856725-1-kafai@fb.com --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0ec6dbeb17a5..69b9e30375bc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2692,7 +2692,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2878,7 +2878,7 @@ union bpf_attr { * * **-EAGAIN** if bpf program can try again. * - * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Try to issue a SYN cookie for the packet with corresponding * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. -- cgit v1.2.3 From 27e5203bd9c5cc6d54dcac48c3027f3f04522b8b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Sep 2020 17:04:15 -0700 Subject: bpf: Change bpf_sk_assign to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON This patch changes the bpf_sk_assign() to take ARG_PTR_TO_BTF_ID_SOCK_COMMON such that they will work with the pointer returned by the bpf_skc_to_*() helpers also. The bpf_sk_lookup_assign() is taking ARG_PTR_TO_SOCKET_"OR_NULL". Meaning it specifically takes a literal NULL. ARG_PTR_TO_BTF_ID_SOCK_COMMON does not allow a literal NULL, so another ARG type is required for this purpose and another follow-up patch can be used if there is such need. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200925000415.3857374-1-kafai@fb.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 69b9e30375bc..2d6519a2ed77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3107,7 +3107,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) * Description * Helper is overloaded depending on BPF program type. This * description applies to **BPF_PROG_TYPE_SCHED_CLS** and -- cgit v1.2.3 From 5d5b4128c4caae34ddcd9b2dc30ac4d6155617a3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 25 Sep 2020 13:46:07 -0700 Subject: devlink: introduce flash update overwrite mask Sections of device flash may contain settings or device identifying information. When performing a flash update, it is generally expected that these settings and identifiers are not overwritten. However, it may sometimes be useful to allow overwriting these fields when performing a flash update. Some examples include, 1) customizing the initial device config on first programming, such as overwriting default device identifying information, or 2) reverting a device configuration to known good state provided in the new firmware image, or 3) in case it is suspected that current firmware logic for managing the preservation of fields during an update is broken. Although some devices are able to completely separate these types of settings and fields into separate components, this is not true for all hardware. To support controlling this behavior, a new DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK is defined. This is an nla_bitfield32 which will define what subset of fields in a component should be overwritten during an update. If no bits are specified, or of the overwrite mask is not provided, then an update should not overwrite anything, and should maintain the settings and identifiers as they are in the previous image. If the overwrite mask has the DEVLINK_FLASH_OVERWRITE_SETTINGS bit set, then the device should be configured to overwrite any of the settings in the requested component with settings found in the provided image. Similarly, if the DEVLINK_FLASH_OVERWRITE_IDENTIFIERS bit is set, the device should be configured to overwrite any device identifiers in the requested component with the identifiers from the image. Multiple overwrite modes may be combined to indicate that a combination of the set of fields that should be overwritten. Drivers which support the new overwrite mask must set the DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK in the supported_flash_update_params field of their devlink_ops. Signed-off-by: Jacob Keller Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index a2ecc8b00611..7b0face1bad5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -230,6 +230,28 @@ enum { DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1 }; +/* Specify what sections of a flash component can be overwritten when + * performing an update. Overwriting of firmware binary sections is always + * implicitly assumed to be allowed. + * + * Each section must be documented in + * Documentation/networking/devlink/devlink-flash.rst + * + */ +enum { + DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT, + DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT, + + __DEVLINK_FLASH_OVERWRITE_MAX_BIT, + DEVLINK_FLASH_OVERWRITE_MAX_BIT = __DEVLINK_FLASH_OVERWRITE_MAX_BIT - 1 +}; + +#define DEVLINK_FLASH_OVERWRITE_SETTINGS _BITUL(DEVLINK_FLASH_OVERWRITE_SETTINGS_BIT) +#define DEVLINK_FLASH_OVERWRITE_IDENTIFIERS _BITUL(DEVLINK_FLASH_OVERWRITE_IDENTIFIERS_BIT) + +#define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \ + (_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1) + /** * enum devlink_trap_action - Packet trap action. * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not @@ -464,6 +486,7 @@ enum devlink_attr { DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, /* u32 */ DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, /* u64 */ + DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, /* bitfield32 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From b38c73ca1c213bbf8a872b334a6bb835becfaba5 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 27 Aug 2020 21:46:08 +0200 Subject: media: v4l2: add support for colorspace conversion API (CSC) for video capture For video capture it is the driver that reports the colorspace, transfer function, Y'CbCr/HSV encoding and quantization range used by the video, and there is no way to request something different, even though many HDTV receivers have some sort of colorspace conversion capabilities. For output video this feature already exists since the application specifies this information for the video format it will send out, and the transmitter will enable any available CSC if a format conversion has to be performed in order to match the capabilities of the sink. For video capture we propose adding new v4l2_pix_format flag: V4L2_PIX_FMT_FLAG_SET_CSC. The flag is set by the application, the driver will interpret the colorspace, xfer_func, ycbcr_enc/hsv_enc and quantization fields as the requested colorspace information and will attempt to do the conversion it supports. Drivers set the flags V4L2_FMT_FLAG_CSC_COLORSPACE, V4L2_FMT_FLAG_CSC_XFER_FUNC, V4L2_FMT_FLAG_CSC_YCBCR_ENC/V4L2_FMT_FLAG_CSC_HSV_ENC, V4L2_FMT_FLAG_CSC_QUANTIZATION, in the flags field of the struct v4l2_fmtdesc during enumeration to indicate that they support colorspace conversion for the respective field. Drivers do not have to actually look at the flags. If the flags are not set, then the fields 'colorspace', 'xfer_func', 'ycbcr_enc/hsv_enc', and 'quantization' are set to the default values by the core, i.e. just pass on the received format without conversion. Signed-off-by: Hans Verkuil Signed-off-by: Philipp Zabel Signed-off-by: Dafna Hirschfeld Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 4769628790da..b2bc83f37024 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -777,6 +777,7 @@ struct v4l2_pix_format { /* Flags */ #define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA 0x00000001 +#define V4L2_PIX_FMT_FLAG_SET_CSC 0x00000002 /* * F O R M A T E N U M E R A T I O N @@ -796,6 +797,11 @@ struct v4l2_fmtdesc { #define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM 0x0004 #define V4L2_FMT_FLAG_DYN_RESOLUTION 0x0008 #define V4L2_FMT_FLAG_ENC_CAP_FRAME_INTERVAL 0x0010 +#define V4L2_FMT_FLAG_CSC_COLORSPACE 0x0020 +#define V4L2_FMT_FLAG_CSC_XFER_FUNC 0x0040 +#define V4L2_FMT_FLAG_CSC_YCBCR_ENC 0x0080 +#define V4L2_FMT_FLAG_CSC_HSV_ENC V4L2_FMT_FLAG_CSC_YCBCR_ENC +#define V4L2_FMT_FLAG_CSC_QUANTIZATION 0x0100 /* Frame Size and frame rate enumeration */ /* -- cgit v1.2.3 From 62aacfa9bf93f94f6949338e0c7a2ed4c4bd2c2a Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 27 Aug 2020 21:46:10 +0200 Subject: media: v4l2: extend the CSC API to subdevice. This patch extends the CSC API in video devices to be supported also on sub-devices. The flag V4L2_MBUS_FRAMEFMT_SET_CSC set by the application when calling VIDIOC_SUBDEV_S_FMT ioctl. The flags: V4L2_SUBDEV_MBUS_CODE_CSC_COLORSPACE, V4L2_SUBDEV_MBUS_CODE_CSC_XFER_FUNC, V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC/V4L2_SUBDEV_MBUS_CODE_CSC_HSV_ENC V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION are set by the driver in the VIDIOC_SUBDEV_ENUM_MBUS_CODE ioctl. New 'flags' fields were added to the structs v4l2_subdev_mbus_code_enum, v4l2_mbus_framefmt which are borrowed from the 'reserved' field The patch also replaces the 'ycbcr_enc' field in 'struct v4l2_mbus_framefmt' with a union that includes 'hsv_enc' Signed-off-by: Dafna Hirschfeld Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-mediabus.h | 15 +++++++++++++-- include/uapi/linux/v4l2-subdev.h | 10 +++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h index 123a231001a8..903e67b16711 100644 --- a/include/uapi/linux/v4l2-mediabus.h +++ b/include/uapi/linux/v4l2-mediabus.h @@ -16,6 +16,8 @@ #include #include +#define V4L2_MBUS_FRAMEFMT_SET_CSC 0x0001 + /** * struct v4l2_mbus_framefmt - frame format on the media bus * @width: image width @@ -24,8 +26,11 @@ * @field: used interlacing type (from enum v4l2_field) * @colorspace: colorspace of the data (from enum v4l2_colorspace) * @ycbcr_enc: YCbCr encoding of the data (from enum v4l2_ycbcr_encoding) + * @hsv_enc: HSV encoding of the data (from enum v4l2_hsv_encoding) * @quantization: quantization of the data (from enum v4l2_quantization) * @xfer_func: transfer function of the data (from enum v4l2_xfer_func) + * @flags: flags (V4L2_MBUS_FRAMEFMT_*) + * @reserved: reserved bytes that can be later used */ struct v4l2_mbus_framefmt { __u32 width; @@ -33,10 +38,16 @@ struct v4l2_mbus_framefmt { __u32 code; __u32 field; __u32 colorspace; - __u16 ycbcr_enc; + union { + /* enum v4l2_ycbcr_encoding */ + __u16 ycbcr_enc; + /* enum v4l2_hsv_encoding */ + __u16 hsv_enc; + }; __u16 quantization; __u16 xfer_func; - __u16 reserved[11]; + __u16 flags; + __u16 reserved[10]; }; #ifndef __KERNEL__ diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 5d2a1dab7911..00850b98078a 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -65,19 +65,27 @@ struct v4l2_subdev_crop { __u32 reserved[8]; }; +#define V4L2_SUBDEV_MBUS_CODE_CSC_COLORSPACE 0x00000001 +#define V4L2_SUBDEV_MBUS_CODE_CSC_XFER_FUNC 0x00000002 +#define V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC 0x00000004 +#define V4L2_SUBDEV_MBUS_CODE_CSC_HSV_ENC V4L2_SUBDEV_MBUS_CODE_CSC_YCBCR_ENC +#define V4L2_SUBDEV_MBUS_CODE_CSC_QUANTIZATION 0x00000008 + /** * struct v4l2_subdev_mbus_code_enum - Media bus format enumeration * @pad: pad number, as reported by the media API * @index: format index during enumeration * @code: format code (MEDIA_BUS_FMT_ definitions) * @which: format type (from enum v4l2_subdev_format_whence) + * @flags: flags set by the driver, (V4L2_SUBDEV_MBUS_CODE_*) */ struct v4l2_subdev_mbus_code_enum { __u32 pad; __u32 index; __u32 code; __u32 which; - __u32 reserved[8]; + __u32 flags; + __u32 reserved[7]; }; /** -- cgit v1.2.3 From c8cb5b854b40f2ce52ccd032fa19750f4181d5fc Mon Sep 17 00:00:00 2001 From: Tova Mussai Date: Fri, 18 Sep 2020 11:33:13 +0200 Subject: nl80211/cfg80211: support 6 GHz scanning Support 6 GHz scanning, by * a new scan flag to scan for colocated BSSes advertised by (and found) APs on 2.4 & 5 GHz * doing the necessary reduced neighbor report parsing for this, to find them * adding the ability to split the scan request in case the device by itself cannot support this. Also add some necessary bits in mac80211 to not break with these changes. Signed-off-by: Tova Mussai Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200918113313.232917c93af9.Ida22f0212f9122f47094d81659e879a50434a6a2@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index bdc90b8dfd24..c74ceaddb909 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6059,6 +6059,8 @@ enum nl80211_timeout_reason { * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with * %NL80211_ATTR_SCAN_FREQ_KHZ. This also means * %NL80211_ATTR_SCAN_FREQUENCIES will not be included. + * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by + * 2.4/5 GHz APs */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, @@ -6075,6 +6077,7 @@ enum nl80211_scan_flags { NL80211_SCAN_FLAG_RANDOM_SN = 1<<11, NL80211_SCAN_FLAG_MIN_PREQ_CONTENT = 1<<12, NL80211_SCAN_FLAG_FREQ_KHZ = 1<<13, + NL80211_SCAN_FLAG_COLOCATED_6GHZ = 1<<14, }; /** -- cgit v1.2.3 From d2b7588a47de8322891de38ec14d15105d66cb1e Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:04 -0700 Subject: nl80211: support S1G capability overrides in assoc NL80211_ATTR_S1G_CAPABILITY can be passed along with NL80211_ATTR_S1G_CAPABILITY_MASK to NL80211_CMD_ASSOCIATE to indicate S1G capabilities which should override the hardware capabilities in eg. the association request. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-4-thomas@adapt-ip.com [johannes: always require both attributes together, commit message] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c74ceaddb909..05db40b4c56f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2521,6 +2521,12 @@ enum nl80211_commands { * unsolicited broadcast probe response. It is a nested attribute, see * &enum nl80211_unsol_bcast_probe_resp_attributes. * + * @NL80211_ATTR_S1G_CAPABILITY: S1G Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * @NL80211_ATTR_S1G_CAPABILITY_MASK: S1G Capability Information element + * override mask. Used with NL80211_ATTR_S1G_CAPABILITY in + * NL80211_CMD_ASSOCIATE or NL80211_CMD_CONNECT. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3007,6 +3013,9 @@ enum nl80211_attrs { NL80211_ATTR_UNSOL_BCAST_PROBE_RESP, + NL80211_ATTR_S1G_CAPABILITY, + NL80211_ATTR_S1G_CAPABILITY_MASK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 1ae099540e8c7f1ee066b3ad45cc91f582bb1ce8 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 25 Sep 2020 16:34:16 +0200 Subject: KVM: x86: Allow deflecting unknown MSR accesses to user space MSRs are weird. Some of them are normal control registers, such as EFER. Some however are registers that really are model specific, not very interesting to virtualization workloads, and not performance critical. Others again are really just windows into package configuration. Out of these MSRs, only the first category is necessary to implement in kernel space. Rarely accessed MSRs, MSRs that should be fine tunes against certain CPU models and MSRs that contain information on the package level are much better suited for user space to process. However, over time we have accumulated a lot of MSRs that are not the first category, but still handled by in-kernel KVM code. This patch adds a generic interface to handle WRMSR and RDMSR from user space. With this, any future MSR that is part of the latter categories can be handled in user space. Furthermore, it allows us to replace the existing "ignore_msrs" logic with something that applies per-VM rather than on the full system. That way you can run productive VMs in parallel to experimental ones where you don't care about proper MSR handling. Signed-off-by: Alexander Graf Reviewed-by: Jim Mattson Message-Id: <20200925143422.21718-3-graf@amazon.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7d8eced6f459..31292a3cdfc2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -248,6 +248,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -413,6 +415,16 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; /* Fix the size of the union. */ char padding[256]; }; @@ -1037,6 +1049,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 1a155254ff937ac92cf9940d273ea597b2c667a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 25 Sep 2020 16:34:21 +0200 Subject: KVM: x86: Introduce MSR filtering It's not desireable to have all MSRs always handled by KVM kernel space. Some MSRs would be useful to handle in user space to either emulate behavior (like uCode updates) or differentiate whether they are valid based on the CPU model. To allow user space to specify which MSRs it wants to see handled by KVM, this patch introduces a new ioctl to push filter rules with bitmaps into KVM. Based on these bitmaps, KVM can then decide whether to reject MSR access. With the addition of KVM_CAP_X86_USER_SPACE_MSR it can also deflect the denied MSR events to user space to operate on. If no filter is populated, MSR handling stays identical to before. Signed-off-by: Alexander Graf Message-Id: <20200925143422.21718-8-graf@amazon.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 31292a3cdfc2..58f43aa1fc21 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -421,6 +421,7 @@ struct kvm_run { __u8 pad[7]; #define KVM_MSR_EXIT_REASON_INVAL (1 << 0) #define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) __u32 reason; /* kernel -> user */ __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ @@ -1050,6 +1051,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 #define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 #ifdef KVM_CAP_IRQ_ROUTING @@ -1551,6 +1553,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ -- cgit v1.2.3 From 58ef7c1b555e0e605da24b76cb2821dd3fcd6bc6 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 21 Sep 2020 19:28:16 -0700 Subject: nl80211: include frequency offset in survey info Recently channels gained a potential frequency offset, so include this in the per-channel survey info. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200922022818.15855-16-thomas@adapt-ip.com [add the offset only if non-zero] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 05db40b4c56f..1e51445f81cd 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4097,6 +4097,7 @@ enum nl80211_user_reg_hint_type { * receiving frames destined to the local BSS * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number * currently defined + * @NL80211_SURVEY_INFO_FREQUENCY_OFFSET: center frequency offset in KHz * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use */ enum nl80211_survey_info { @@ -4112,6 +4113,7 @@ enum nl80211_survey_info { NL80211_SURVEY_INFO_TIME_SCAN, NL80211_SURVEY_INFO_PAD, NL80211_SURVEY_INFO_TIME_BSS_RX, + NL80211_SURVEY_INFO_FREQUENCY_OFFSET, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, -- cgit v1.2.3 From f5bec330e3010450daeb5cb6a94a4a7c54afa306 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Mon, 28 Sep 2020 00:28:11 -0700 Subject: nl80211: extend support to config spatial reuse parameter set Allow the user to configure below Spatial Reuse Parameter Set element. * Non-SRG OBSS PD Max Offset * SRG BSS Color Bitmap * SRG Partial BSSID Bitmap Signed-off-by: Rajkumar Manoharan Link: https://lore.kernel.org/r/1601278091-20313-2-git-send-email-rmanohar@codeaurora.org Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1e51445f81cd..47700a2b9af9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6991,6 +6991,13 @@ enum nl80211_peer_measurement_ftm_resp { * * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset. * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset. + * @NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET: the non-SRG OBSS PD maximum + * tx power offset. + * @NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP: bitmap that indicates the BSS color + * values used by members of the SRG. + * @NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP: bitmap that indicates the partial + * BSSID values used by members of the SRG. + * @NL80211_HE_OBSS_PD_ATTR_SR_CTRL: The SR Control field of SRP element. * * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute. @@ -7000,6 +7007,10 @@ enum nl80211_obss_pd_attributes { NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET, NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET, + NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET, + NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP, + NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP, + NL80211_HE_OBSS_PD_ATTR_SR_CTRL, /* keep last */ __NL80211_HE_OBSS_PD_ATTR_LAST, -- cgit v1.2.3 From 1b4d60ec162f82ea29a2e7a907b5c6cc9f926321 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 25 Sep 2020 13:54:29 -0700 Subject: bpf: Enable BPF_PROG_TEST_RUN for raw_tracepoint Add .test_run for raw_tracepoint. Also, introduce a new feature that runs the target program on a specific CPU. This is achieved by a new flag in bpf_attr.test, BPF_F_TEST_RUN_ON_CPU. When this flag is set, the program is triggered on cpu with id bpf_attr.test.cpu. This feature is needed for BPF programs that handle perf_event and other percpu resources, as the program can access these resource locally. Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200925205432.1777-2-songliubraving@fb.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2d6519a2ed77..82522f05c021 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -424,6 +424,11 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* Flags for BPF_PROG_TEST_RUN */ + +/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ +#define BPF_F_TEST_RUN_ON_CPU (1U << 0) + /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { /* enabled run_time_ns and run_cnt */ @@ -566,6 +571,8 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ -- cgit v1.2.3 From c4d0bfb45068d853a478b9067a95969b1886a30f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:05 +0100 Subject: bpf: Add bpf_snprintf_btf helper A helper is added to support tracing kernel type information in BPF using the BPF Type Format (BTF). Its signature is long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); struct btf_ptr * specifies - a pointer to the data to be traced - the BTF id of the type of data pointed to - a flags field is provided for future use; these flags are not to be confused with the BTF_F_* flags below that control how the btf_ptr is displayed; the flags member of the struct btf_ptr may be used to disambiguate types in kernel versus module BTF, etc; the main distinction is the flags relate to the type and information needed in identifying it; not how it is displayed. For example a BPF program with a struct sk_buff *skb could do the following: static struct btf_ptr b = { }; b.ptr = skb; b.type_id = __builtin_btf_type_id(struct sk_buff, 1); bpf_snprintf_btf(str, sizeof(str), &b, sizeof(b), 0, 0); Default output looks like this: (struct sk_buff){ .transport_header = (__u16)65535, .mac_header = (__u16)65535, .end = (sk_buff_data_t)192, .head = (unsigned char *)0x000000007524fd8b, .data = (unsigned char *)0x000000007524fd8b, .truesize = (unsigned int)768, .users = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, } Flags modifying display are as follows: - BTF_F_COMPACT: no formatting around type information - BTF_F_NONAME: no struct/union member names/types - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; equivalent to %px. - BTF_F_ZERO: show zero-valued struct/union members; they are not displayed by default Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-4-git-send-email-alan.maguire@oracle.com --- include/uapi/linux/bpf.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 82522f05c021..cca9eb1b13e5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3594,6 +3594,42 @@ union bpf_attr { * the data in *dst*. This is a wrapper of **copy_from_user**\ (). * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3745,6 +3781,7 @@ union bpf_attr { FN(inode_storage_delete), \ FN(d_path), \ FN(copy_from_user), \ + FN(snprintf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4853,4 +4890,34 @@ struct bpf_sk_lookup { __u32 local_port; /* Host byte order */ }; +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From eb411377aed9e27835e77ee0710ee8f4649958f3 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 28 Sep 2020 12:31:09 +0100 Subject: bpf: Add bpf_seq_printf_btf helper A helper is added to allow seq file writing of kernel data structures using vmlinux BTF. Its signature is long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags); Flags and struct btf_ptr definitions/use are identical to the bpf_snprintf_btf helper, and the helper returns 0 on success or a negative error value. Suggested-by: Alexei Starovoitov Signed-off-by: Alan Maguire Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1601292670-1616-8-git-send-email-alan.maguire@oracle.com --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cca9eb1b13e5..96ddb00b91dc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3630,6 +3630,14 @@ union bpf_attr { * The number of bytes that were written (or would have been * written if output had to be truncated due to string size), * or a negative error in cases of failure. + * + * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags) + * Description + * Use BTF to write to seq_write a string representation of + * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). + * *flags* are identical to those used for bpf_snprintf_btf. + * Return + * 0 on success or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3782,6 +3790,7 @@ union bpf_attr { FN(d_path), \ FN(copy_from_user), \ FN(snprintf_btf), \ + FN(seq_printf_btf), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3789af9a13e5561738c0f2114e3a5e22c843ca3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 30 Jul 2020 21:08:48 +0000 Subject: PCI/PM: Rename pci_dev.d3_delay to d3hot_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI devices support two variants of the D3 power state: D3hot (main power present) D3cold (main power removed). Previously struct pci_dev contained: unsigned int d3_delay; /* D3->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ "d3_delay" refers specifically to the D3hot state. Rename it to "d3hot_delay" to avoid ambiguity and align with the ACPI "_DSM for Specifying Device Readiness Durations" in the PCI Firmware spec r3.2, sec 4.6.9. There is no change to the functionality. Link: https://lore.kernel.org/r/20200730210848.1578826-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f9701410d3b5..49f15c37e771 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -246,7 +246,7 @@ #define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ #define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ #define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ -#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3hot 0x4000 /* PME# from D3 (hot) */ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ #define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ -- cgit v1.2.3 From 4a1e7c0c63e02daad751842b7880f9bbcdfb6e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 29 Sep 2020 14:45:51 +0200 Subject: bpf: Support attaching freplace programs to multiple attach points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables support for attaching freplace programs to multiple attach points. It does this by amending the UAPI for bpf_link_Create with a target btf ID that can be used to supply the new attachment point along with the target program fd. The target must be compatible with the target that was supplied at program load time. The implementation reuses the checks that were factored out of check_attach_btf_id() to ensure compatibility between the BTF types of the old and new attachment. If these match, a new bpf_tracing_link will be created for the new attach target, allowing multiple attachments to co-exist simultaneously. The code could theoretically support multiple-attach of other types of tracing programs as well, but since I don't have a use case for any of those, there is no API support for doing so. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/160138355169.48470.17165680973640685368.stgit@toke.dk --- include/uapi/linux/bpf.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 96ddb00b91dc..2b1d3f16cbd1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -639,8 +639,13 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ - __aligned_u64 iter_info; /* extra bpf_iter_link_info */ - __u32 iter_info_len; /* iter_info length */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ -- cgit v1.2.3 From 3f47cb4c1cf3bceb2438ea962bfffc6665ee4a9f Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 29 Sep 2020 13:35:41 +0100 Subject: l2tp: report rx cookie discards in netlink get When an L2TPv3 session receives a data frame with an incorrect cookie l2tp_core logs a warning message and bumps a stats counter to reflect the fact that the packet has been dropped. However, the stats counter in question is missing from the l2tp_netlink get message for tunnel and session instances. Include the statistic in the netlink get response. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- include/uapi/linux/l2tp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 88a0d32b8c07..30c80d5ba4bf 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -144,6 +144,7 @@ enum { L2TP_ATTR_RX_OOS_PACKETS, /* u64 */ L2TP_ATTR_RX_ERRORS, /* u64 */ L2TP_ATTR_STATS_PAD, + L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */ __L2TP_ATTR_STATS_MAX, }; -- cgit v1.2.3 From 2ec13cbcfadbbeac499f3b63de0f7db490d45a7e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 29 Sep 2020 11:08:59 -0700 Subject: devlink: include for _BITUL Commit 5d5b4128c4ca ("devlink: introduce flash update overwrite mask") added a usage of _BITUL to the UAPI header, but failed to include the header file where it was defined. It happens that this does not break any existing kernel include chains because it gets included through other sources. However, when including the UAPI headers in a userspace application (such as devlink in iproute2), _BITUL is not defined. Fixes: 5d5b4128c4ca ("devlink: introduce flash update overwrite mask") Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 7b0face1bad5..ba467dc07852 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -13,6 +13,8 @@ #ifndef _UAPI_LINUX_DEVLINK_H_ #define _UAPI_LINUX_DEVLINK_H_ +#include + #define DEVLINK_GENL_NAME "devlink" #define DEVLINK_GENL_VERSION 0x1 #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config" -- cgit v1.2.3 From 539430fbbcc4a8d02451c77fff1ecd1f3b5f8abf Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:27:50 +0800 Subject: gpio: uapi: define GPIO_MAX_NAME_SIZE for array sizes Replace constant array sizes with a macro constant to clarify the source of array sizes, provide a place to document any constraints on the size, and to simplify array sizing in userspace if constructing structs from their composite fields. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/gpio.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 9c27cecf406f..285cc10355b2 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -14,6 +14,11 @@ #include #include +/* + * The maximum size of name and label arrays. + */ +#define GPIO_MAX_NAME_SIZE 32 + /** * struct gpiochip_info - Information about a certain GPIO chip * @name: the Linux kernel name of this GPIO chip @@ -22,8 +27,8 @@ * @lines: number of GPIO lines on this chip */ struct gpiochip_info { - char name[32]; - char label[32]; + char name[GPIO_MAX_NAME_SIZE]; + char label[GPIO_MAX_NAME_SIZE]; __u32 lines; }; @@ -52,8 +57,8 @@ struct gpiochip_info { struct gpioline_info { __u32 line_offset; __u32 flags; - char name[32]; - char consumer[32]; + char name[GPIO_MAX_NAME_SIZE]; + char consumer[GPIO_MAX_NAME_SIZE]; }; /* Maximum number of requested handles */ @@ -123,7 +128,7 @@ struct gpiohandle_request { __u32 lineoffsets[GPIOHANDLES_MAX]; __u32 flags; __u8 default_values[GPIOHANDLES_MAX]; - char consumer_label[32]; + char consumer_label[GPIO_MAX_NAME_SIZE]; __u32 lines; int fd; }; @@ -182,7 +187,7 @@ struct gpioevent_request { __u32 lineoffset; __u32 handleflags; __u32 eventflags; - char consumer_label[32]; + char consumer_label[GPIO_MAX_NAME_SIZE]; int fd; }; -- cgit v1.2.3 From b53911aa872db462be2e5f1dd611b25c4c2e663b Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:27:51 +0800 Subject: gpio: uapi: define uAPI v2 Add a new version of the uAPI to address existing 32/64-bit alignment issues, add support for debounce and event sequence numbers, allow requested lines with different configurations, and provide some future proofing by adding padding reserved for future use. The alignment issue relates to the gpioevent_data, which packs to different sizes on 32-bit and 64-bit platforms. That creates problems for 32-bit apps running on 64-bit kernels. uAPI v2 addresses that particular issue, and the problem more generally, by adding pad fields that explicitly pad structs out to 64-bit boundaries, so they will pack to the same size now, and even if some of the reserved padding is used for __u64 fields in the future. The new structs have been analysed with pahole to ensure that they are sized as expected and contain no implicit padding. The lack of future proofing in v1 makes it impossible to, for example, add the debounce feature that is included in v2. The future proofing is addressed by providing configurable attributes in line config and reserved padding in all structs for future features. Specifically, the line request, config, info, info_changed and event structs receive updated versions and new ioctls. As the majority of the structs and ioctls were being replaced, it is opportune to rework some of the other aspects of the uAPI: v1 has three different flags fields, each with their own separate bit definitions. In v2 that is collapsed to one - gpio_v2_line_flag. The handle and event requests are merged into a single request, the line request, as the two requests were mostly the same other than the edge detection provided by event requests. As a byproduct, the v2 uAPI allows for multiple lines producing edge events on the same line handle. This is a new capability as v1 only supports a single line in an event request. As a consequence, there are now only two types of file handle to be concerned with, the chip and the line, and it is clearer which ioctls apply to which type of handle. There is also some minor renaming of fields for consistency compared to their v1 counterparts, e.g. offset rather than lineoffset or line_offset, and consumer rather than consumer_label. Additionally, v1 GPIOHANDLES_MAX becomes GPIO_V2_LINES_MAX in v2 for clarity, and the gpiohandle_data __u8 array becomes a bitmap in gpio_v2_line_values. The v2 uAPI is mostly a reorganisation and extension of v1, so userspace code, particularly libgpiod, should readily port to it. Signed-off-by: Kent Gibson Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/gpio.h | 291 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 284 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 285cc10355b2..5904f49399de 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -11,11 +11,14 @@ #ifndef _UAPI_GPIO_H_ #define _UAPI_GPIO_H_ +#include #include #include /* * The maximum size of name and label arrays. + * + * Must be a multiple of 8 to ensure 32/64-bit alignment of structs. */ #define GPIO_MAX_NAME_SIZE 32 @@ -32,6 +35,265 @@ struct gpiochip_info { __u32 lines; }; +/* + * Maximum number of requested lines. + * + * Must be no greater than 64, as bitmaps are restricted here to 64-bits + * for simplicity, and a multiple of 2 to ensure 32/64-bit alignment of + * structs. + */ +#define GPIO_V2_LINES_MAX 64 + +/* + * The maximum number of configuration attributes associated with a line + * request. + */ +#define GPIO_V2_LINE_NUM_ATTRS_MAX 10 + +/** + * enum gpio_v2_line_flag - &struct gpio_v2_line_attribute.flags values + * @GPIO_V2_LINE_FLAG_USED: line is not available for request + * @GPIO_V2_LINE_FLAG_ACTIVE_LOW: line active state is physical low + * @GPIO_V2_LINE_FLAG_INPUT: line is an input + * @GPIO_V2_LINE_FLAG_OUTPUT: line is an output + * @GPIO_V2_LINE_FLAG_EDGE_RISING: line detects rising (inactive to active) + * edges + * @GPIO_V2_LINE_FLAG_EDGE_FALLING: line detects falling (active to + * inactive) edges + * @GPIO_V2_LINE_FLAG_OPEN_DRAIN: line is an open drain output + * @GPIO_V2_LINE_FLAG_OPEN_SOURCE: line is an open source output + * @GPIO_V2_LINE_FLAG_BIAS_PULL_UP: line has pull-up bias enabled + * @GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN: line has pull-down bias enabled + * @GPIO_V2_LINE_FLAG_BIAS_DISABLED: line has bias disabled + */ +enum gpio_v2_line_flag { + GPIO_V2_LINE_FLAG_USED = _BITULL(0), + GPIO_V2_LINE_FLAG_ACTIVE_LOW = _BITULL(1), + GPIO_V2_LINE_FLAG_INPUT = _BITULL(2), + GPIO_V2_LINE_FLAG_OUTPUT = _BITULL(3), + GPIO_V2_LINE_FLAG_EDGE_RISING = _BITULL(4), + GPIO_V2_LINE_FLAG_EDGE_FALLING = _BITULL(5), + GPIO_V2_LINE_FLAG_OPEN_DRAIN = _BITULL(6), + GPIO_V2_LINE_FLAG_OPEN_SOURCE = _BITULL(7), + GPIO_V2_LINE_FLAG_BIAS_PULL_UP = _BITULL(8), + GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN = _BITULL(9), + GPIO_V2_LINE_FLAG_BIAS_DISABLED = _BITULL(10), +}; + +/** + * struct gpio_v2_line_values - Values of GPIO lines + * @bits: a bitmap containing the value of the lines, set to 1 for active + * and 0 for inactive. + * @mask: a bitmap identifying the lines to get or set, with each bit + * number corresponding to the index into &struct + * gpio_v2_line_request.offsets. + */ +struct gpio_v2_line_values { + __aligned_u64 bits; + __aligned_u64 mask; +}; + +/** + * enum gpio_v2_line_attr_id - &struct gpio_v2_line_attribute.id values + * identifying which field of the attribute union is in use. + * @GPIO_V2_LINE_ATTR_ID_FLAGS: flags field is in use + * @GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES: values field is in use + * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us is in use + */ +enum gpio_v2_line_attr_id { + GPIO_V2_LINE_ATTR_ID_FLAGS = 1, + GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES = 2, + GPIO_V2_LINE_ATTR_ID_DEBOUNCE = 3, +}; + +/** + * struct gpio_v2_line_attribute - a configurable attribute of a line + * @id: attribute identifier with value from &enum gpio_v2_line_attr_id + * @padding: reserved for future use and must be zero filled + * @flags: if id is GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO + * line, with values from enum gpio_v2_line_flag, such as + * GPIO_V2_LINE_FLAG_ACTIVE_LOW, GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed + * together. This overrides the default flags contained in the &struct + * gpio_v2_line_config for the associated line. + * @values: if id is GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap + * containing the values to which the lines will be set, with each bit + * number corresponding to the index into &struct + * gpio_v2_line_request.offsets. + * @debounce_period_us: if id is GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the desired + * debounce period, in microseconds + */ +struct gpio_v2_line_attribute { + __u32 id; + __u32 padding; + union { + __aligned_u64 flags; + __aligned_u64 values; + __u32 debounce_period_us; + }; +}; + +/** + * struct gpio_v2_line_config_attribute - a configuration attribute + * associated with one or more of the requested lines. + * @attr: the configurable attribute + * @mask: a bitmap identifying the lines to which the attribute applies, + * with each bit number corresponding to the index into &struct + * gpio_v2_line_request.offsets. + */ +struct gpio_v2_line_config_attribute { + struct gpio_v2_line_attribute attr; + __aligned_u64 mask; +}; + +/** + * struct gpio_v2_line_config - Configuration for GPIO lines + * @flags: flags for the GPIO lines, with values from enum + * gpio_v2_line_flag, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together. This is the default for + * all requested lines but may be overridden for particular lines using + * attrs. + * @num_attrs: the number of attributes in attrs + * @padding: reserved for future use and must be zero filled + * @attrs: the configuration attributes associated with the requested + * lines. Any attribute should only be associated with a particular line + * once. If an attribute is associated with a line multiple times then the + * first occurrence (i.e. lowest index) has precedence. + */ +struct gpio_v2_line_config { + __aligned_u64 flags; + __u32 num_attrs; + /* Pad to fill implicit padding and reserve space for future use. */ + __u32 padding[5]; + struct gpio_v2_line_config_attribute attrs[GPIO_V2_LINE_NUM_ATTRS_MAX]; +}; + +/** + * struct gpio_v2_line_request - Information about a request for GPIO lines + * @offsets: an array of desired lines, specified by offset index for the + * associated GPIO chip + * @consumer: a desired consumer label for the selected GPIO lines such as + * "my-bitbanged-relay" + * @config: requested configuration for the lines. + * @num_lines: number of lines requested in this request, i.e. the number + * of valid fields in the GPIO_V2_LINES_MAX sized arrays, set to 1 to + * request a single line + * @event_buffer_size: a suggested minimum number of line events that the + * kernel should buffer. This is only relevant if edge detection is + * enabled in the configuration. Note that this is only a suggested value + * and the kernel may allocate a larger buffer or cap the size of the + * buffer. If this field is zero then the buffer size defaults to a minimum + * of num_lines*16. + * @padding: reserved for future use and must be zero filled + * @fd: if successful this field will contain a valid anonymous file handle + * after a GPIO_GET_LINE_IOCTL operation, zero or negative value means + * error + */ +struct gpio_v2_line_request { + __u32 offsets[GPIO_V2_LINES_MAX]; + char consumer[GPIO_MAX_NAME_SIZE]; + struct gpio_v2_line_config config; + __u32 num_lines; + __u32 event_buffer_size; + /* Pad to fill implicit padding and reserve space for future use. */ + __u32 padding[5]; + __s32 fd; +}; + +/** + * struct gpio_v2_line_info - Information about a certain GPIO line + * @name: the name of this GPIO line, such as the output pin of the line on + * the chip, a rail or a pin header name on a board, as specified by the + * GPIO chip, may be empty + * @consumer: a functional name for the consumer of this GPIO line as set + * by whatever is using it, will be empty if there is no current user but + * may also be empty if the consumer doesn't set this up + * @flags: flags for the GPIO line, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together + * @offset: the local offset on this GPIO chip, fill this in when + * requesting the line information from the kernel + * @num_attrs: the number of attributes in attrs + * @attrs: the configuration attributes associated with the line + * @padding: reserved for future use + */ +struct gpio_v2_line_info { + char name[GPIO_MAX_NAME_SIZE]; + char consumer[GPIO_MAX_NAME_SIZE]; + __u32 offset; + __u32 num_attrs; + __aligned_u64 flags; + struct gpio_v2_line_attribute attrs[GPIO_V2_LINE_NUM_ATTRS_MAX]; + /* Space reserved for future use. */ + __u32 padding[4]; +}; + +/** + * enum gpio_v2_line_changed_type - &struct gpio_v2_line_changed.event_type + * values + * @GPIO_V2_LINE_CHANGED_REQUESTED: line has been requested + * @GPIO_V2_LINE_CHANGED_RELEASED: line has been released + * @GPIO_V2_LINE_CHANGED_CONFIG: line has been reconfigured + */ +enum gpio_v2_line_changed_type { + GPIO_V2_LINE_CHANGED_REQUESTED = 1, + GPIO_V2_LINE_CHANGED_RELEASED = 2, + GPIO_V2_LINE_CHANGED_CONFIG = 3, +}; + +/** + * struct gpio_v2_line_info_changed - Information about a change in status + * of a GPIO line + * @info: updated line information + * @timestamp_ns: estimate of time of status change occurrence, in nanoseconds + * @event_type: the type of change with a value from enum + * gpio_v2_line_changed_type + * @padding: reserved for future use + */ +struct gpio_v2_line_info_changed { + struct gpio_v2_line_info info; + __aligned_u64 timestamp_ns; + __u32 event_type; + /* Pad struct to 64-bit boundary and reserve space for future use. */ + __u32 padding[5]; +}; + +/** + * enum gpio_v2_line_event_id - &struct gpio_v2_line_event.id values + * @GPIO_V2_LINE_EVENT_RISING_EDGE: event triggered by a rising edge + * @GPIO_V2_LINE_EVENT_FALLING_EDGE: event triggered by a falling edge + */ +enum gpio_v2_line_event_id { + GPIO_V2_LINE_EVENT_RISING_EDGE = 1, + GPIO_V2_LINE_EVENT_FALLING_EDGE = 2, +}; + +/** + * struct gpio_v2_line_event - The actual event being pushed to userspace + * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds. + * The timestamp_ns is read from CLOCK_MONOTONIC and is intended to allow the + * accurate measurement of the time between events. It does not provide + * the wall-clock time. + * @id: event identifier with value from enum gpio_v2_line_event_id + * @offset: the offset of the line that triggered the event + * @seqno: the sequence number for this event in the sequence of events for + * all the lines in this line request + * @line_seqno: the sequence number for this event in the sequence of + * events on this particular line + * @padding: reserved for future use + */ +struct gpio_v2_line_event { + __aligned_u64 timestamp_ns; + __u32 id; + __u32 offset; + __u32 seqno; + __u32 line_seqno; + /* Space reserved for future use. */ + __u32 padding[6]; +}; + +/* + * ABI v1 + */ + /* Informational flags */ #define GPIOLINE_FLAG_KERNEL (1UL << 0) /* Line used by the kernel */ #define GPIOLINE_FLAG_IS_OUT (1UL << 1) @@ -149,8 +411,6 @@ struct gpiohandle_config { __u32 padding[4]; /* padding for future use */ }; -#define GPIOHANDLE_SET_CONFIG_IOCTL _IOWR(0xB4, 0x0a, struct gpiohandle_config) - /** * struct gpiohandle_data - Information of values on a GPIO handle * @values: when getting the state of lines this contains the current @@ -161,9 +421,6 @@ struct gpiohandle_data { __u8 values[GPIOHANDLES_MAX]; }; -#define GPIOHANDLE_GET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x08, struct gpiohandle_data) -#define GPIOHANDLE_SET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x09, struct gpiohandle_data) - /* Eventrequest flags */ #define GPIOEVENT_REQUEST_RISING_EDGE (1UL << 0) #define GPIOEVENT_REQUEST_FALLING_EDGE (1UL << 1) @@ -207,11 +464,31 @@ struct gpioevent_data { __u32 id; }; +/* + * v1 and v2 ioctl()s + */ #define GPIO_GET_CHIPINFO_IOCTL _IOR(0xB4, 0x01, struct gpiochip_info) +#define GPIO_GET_LINEINFO_UNWATCH_IOCTL _IOWR(0xB4, 0x0C, __u32) + +/* + * v2 ioctl()s + */ +#define GPIO_V2_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x05, struct gpio_v2_line_info) +#define GPIO_V2_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x06, struct gpio_v2_line_info) +#define GPIO_V2_GET_LINE_IOCTL _IOWR(0xB4, 0x07, struct gpio_v2_line_request) +#define GPIO_V2_LINE_SET_CONFIG_IOCTL _IOWR(0xB4, 0x0D, struct gpio_v2_line_config) +#define GPIO_V2_LINE_GET_VALUES_IOCTL _IOWR(0xB4, 0x0E, struct gpio_v2_line_values) +#define GPIO_V2_LINE_SET_VALUES_IOCTL _IOWR(0xB4, 0x0F, struct gpio_v2_line_values) + +/* + * v1 ioctl()s + */ #define GPIO_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x02, struct gpioline_info) -#define GPIO_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x0b, struct gpioline_info) -#define GPIO_GET_LINEINFO_UNWATCH_IOCTL _IOWR(0xB4, 0x0c, __u32) #define GPIO_GET_LINEHANDLE_IOCTL _IOWR(0xB4, 0x03, struct gpiohandle_request) #define GPIO_GET_LINEEVENT_IOCTL _IOWR(0xB4, 0x04, struct gpioevent_request) +#define GPIOHANDLE_GET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x08, struct gpiohandle_data) +#define GPIOHANDLE_SET_LINE_VALUES_IOCTL _IOWR(0xB4, 0x09, struct gpiohandle_data) +#define GPIOHANDLE_SET_CONFIG_IOCTL _IOWR(0xB4, 0x0A, struct gpiohandle_config) +#define GPIO_GET_LINEINFO_WATCH_IOCTL _IOWR(0xB4, 0x0B, struct gpioline_info) #endif /* _UAPI_GPIO_H_ */ -- cgit v1.2.3 From b234d233fe30c63c4e461b03e2884a6765c8e5b0 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Sep 2020 08:28:00 +0800 Subject: gpio: uapi: document uAPI v1 as deprecated Update uAPI documentation to deprecate v1 structs and ioctls. Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- include/uapi/linux/gpio.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 5904f49399de..07865c601099 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -292,6 +292,9 @@ struct gpio_v2_line_event { /* * ABI v1 + * + * This version of the ABI is deprecated. + * Use the latest version of the ABI, defined above, instead. */ /* Informational flags */ @@ -315,6 +318,9 @@ struct gpio_v2_line_event { * @consumer: a functional name for the consumer of this GPIO line as set by * whatever is using it, will be empty if there is no current user but may * also be empty if the consumer doesn't set this up + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_info instead. */ struct gpioline_info { __u32 line_offset; @@ -346,6 +352,9 @@ enum { * guarantee there are no implicit holes between it and subsequent members. * The 20-byte padding at the end makes sure we don't add any implicit padding * at the end of the structure on 64-bit architectures. + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_info_changed instead. */ struct gpioline_info_changed { struct gpioline_info info; @@ -385,6 +394,9 @@ struct gpioline_info_changed { * @fd: if successful this field will contain a valid anonymous file handle * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value * means error + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_request instead. */ struct gpiohandle_request { __u32 lineoffsets[GPIOHANDLES_MAX]; @@ -404,6 +416,9 @@ struct gpiohandle_request { * this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @padding: reserved for future use and should be zero filled + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_config instead. */ struct gpiohandle_config { __u32 flags; @@ -416,6 +431,9 @@ struct gpiohandle_config { * @values: when getting the state of lines this contains the current * state of a line, when setting the state of lines these should contain * the desired target state + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_values instead. */ struct gpiohandle_data { __u8 values[GPIOHANDLES_MAX]; @@ -439,6 +457,9 @@ struct gpiohandle_data { * @fd: if successful this field will contain a valid anonymous file handle * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value * means error + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_request instead. */ struct gpioevent_request { __u32 lineoffset; @@ -458,6 +479,9 @@ struct gpioevent_request { * struct gpioevent_data - The actual event being pushed to userspace * @timestamp: best estimate of time of event occurrence, in nanoseconds * @id: event identifier + * + * This struct is part of ABI v1 and is deprecated. + * Use struct gpio_v2_line_event instead. */ struct gpioevent_data { __u64 timestamp; @@ -482,6 +506,8 @@ struct gpioevent_data { /* * v1 ioctl()s + * + * These ioctl()s are deprecated. Use the v2 equivalent instead. */ #define GPIO_GET_LINEINFO_IOCTL _IOWR(0xB4, 0x02, struct gpioline_info) #define GPIO_GET_LINEHANDLE_IOCTL _IOWR(0xB4, 0x03, struct gpiohandle_request) -- cgit v1.2.3 From 002f2176532093753cb6ced61e5ea7b8904c6cae Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Mon, 28 Sep 2020 14:27:10 +0200 Subject: netfilter: nf_tables: add userdata attributes to nft_chain Enables storing userdata for nft_chain. Field udata points to user data and udlen stores its length. Adds new attribute flag NFTA_CHAIN_USERDATA. Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 3c2469b43742..352ee51707a1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -208,6 +208,7 @@ enum nft_chain_flags { * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) * @NFTA_CHAIN_FLAGS: chain flags * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32) + * @NFTA_CHAIN_USERDATA: user data (NLA_BINARY) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -222,6 +223,7 @@ enum nft_chain_attributes { NFTA_CHAIN_PAD, NFTA_CHAIN_FLAGS, NFTA_CHAIN_ID, + NFTA_CHAIN_USERDATA, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) -- cgit v1.2.3 From b426ce83baa7dff947fb354118d3133f2953aac8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:15 +0200 Subject: bpf: Add classid helper only based on skb->sk Similarly to 5a52ae4e32a6 ("bpf: Allow to retrieve cgroup v1 classid from v2 hooks"), add a helper to retrieve cgroup v1 classid solely based on the skb->sk, so it can be used as key as part of BPF map lookups out of tc from host ns, in particular given the skb->sk is retained these days when crossing net ns thanks to 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb."). This is similar to bpf_skb_cgroup_id() which implements the same for v2. Kubernetes ecosystem is still operating on v1 however, hence net_cls needs to be used there until this can be dropped in with the v2 helper of bpf_skb_cgroup_id(). Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/ed633cf27a1c620e901c5aa99ebdefb028dce600.1601477936.git.daniel@iogearbox.net --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2b1d3f16cbd1..6116a7f54c8f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3643,6 +3643,15 @@ union bpf_attr { * *flags* are identical to those used for bpf_snprintf_btf. * Return * 0 on success or a negative error in case of failure. + * + * u64 bpf_skb_cgroup_classid(struct sk_buff *skb) + * Description + * See **bpf_get_cgroup_classid**\ () for the main description. + * This helper differs from **bpf_get_cgroup_classid**\ () in that + * the cgroup v1 net_cls class is retrieved only from the *skb*'s + * associated socket instead of the current process. + * Return + * The id is returned or 0 in case the id could not be retrieved. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3796,6 +3805,7 @@ union bpf_attr { FN(copy_from_user), \ FN(snprintf_btf), \ FN(seq_printf_btf), \ + FN(skb_cgroup_classid), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From b4ab31414970a7a03a5d55d75083f2c101a30592 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2020 17:18:17 +0200 Subject: bpf: Add redirect_neigh helper as redirect drop-in Add a redirect_neigh() helper as redirect() drop-in replacement for the xmit side. Main idea for the helper is to be very similar in semantics to the latter just that the skb gets injected into the neighboring subsystem in order to let the stack do the work it knows best anyway to populate the L2 addresses of the packet and then hand over to dev_queue_xmit() as redirect() does. This solves two bigger items: i) skbs don't need to go up to the stack on the host facing veth ingress side for traffic egressing the container to achieve the same for populating L2 which also has the huge advantage that ii) the skb->sk won't get orphaned in ip_rcv_core() when entering the IP routing layer on the host stack. Given that skb->sk neither gets orphaned when crossing the netns as per 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb.") the helper can then push the skbs directly to the phys device where FQ scheduler can do its work and TCP stack gets proper backpressure given we hold on to skb->sk as long as skb is still residing in queues. With the helper used in BPF data path to then push the skb to the phys device, I observed a stable/consistent TCP_STREAM improvement on veth devices for traffic going container -> host -> host -> container from ~10Gbps to ~15Gbps for a single stream in my test environment. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Cc: David Ahern Link: https://lore.kernel.org/bpf/f207de81629e1724899b73b8112e0013be782d35.1601477936.git.daniel@iogearbox.net --- include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6116a7f54c8f..1f17c6752deb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3652,6 +3652,19 @@ union bpf_attr { * associated socket instead of the current process. * Return * The id is returned or 0 in case the id could not be retrieved. + * + * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex* + * and fill in L2 addresses from neighboring subsystem. This helper + * is somewhat similar to **bpf_redirect**\ (), except that it + * fills in e.g. MAC addresses based on the L3 information from + * the packet. This helper is supported for IPv4 and IPv6 protocols. + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3806,6 +3819,7 @@ union bpf_attr { FN(snprintf_btf), \ FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ + FN(redirect_neigh), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 9d4a75efa200a31deabe9ba1c941aef697e6bb30 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Aug 2020 16:58:29 +0200 Subject: io_uring: use an enumeration for io_uring_register(2) opcodes The enumeration allows us to keep track of the last io_uring_register(2) opcode available. Behaviour and opcodes names don't change. Signed-off-by: Stefano Garzarella Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d65fde732518..5f12ae6a415c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -255,17 +255,22 @@ struct io_uring_params { /* * io_uring_register(2) opcodes and arguments */ -#define IORING_REGISTER_BUFFERS 0 -#define IORING_UNREGISTER_BUFFERS 1 -#define IORING_REGISTER_FILES 2 -#define IORING_UNREGISTER_FILES 3 -#define IORING_REGISTER_EVENTFD 4 -#define IORING_UNREGISTER_EVENTFD 5 -#define IORING_REGISTER_FILES_UPDATE 6 -#define IORING_REGISTER_EVENTFD_ASYNC 7 -#define IORING_REGISTER_PROBE 8 -#define IORING_REGISTER_PERSONALITY 9 -#define IORING_UNREGISTER_PERSONALITY 10 +enum { + IORING_REGISTER_BUFFERS = 0, + IORING_UNREGISTER_BUFFERS = 1, + IORING_REGISTER_FILES = 2, + IORING_UNREGISTER_FILES = 3, + IORING_REGISTER_EVENTFD = 4, + IORING_UNREGISTER_EVENTFD = 5, + IORING_REGISTER_FILES_UPDATE = 6, + IORING_REGISTER_EVENTFD_ASYNC = 7, + IORING_REGISTER_PROBE = 8, + IORING_REGISTER_PERSONALITY = 9, + IORING_UNREGISTER_PERSONALITY = 10, + + /* this goes last */ + IORING_REGISTER_LAST +}; struct io_uring_files_update { __u32 offset; -- cgit v1.2.3 From 21b55dbc0653018b8cd4513c37cbca303b0f0d50 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Aug 2020 16:58:30 +0200 Subject: io_uring: add IOURING_REGISTER_RESTRICTIONS opcode The new io_uring_register(2) IOURING_REGISTER_RESTRICTIONS opcode permanently installs a feature allowlist on an io_ring_ctx. The io_ring_ctx can then be passed to untrusted code with the knowledge that only operations present in the allowlist can be executed. The allowlist approach ensures that new features added to io_uring do not accidentally become available when an existing application is launched on a newer kernel version. Currently is it possible to restrict sqe opcodes, sqe flags, and register opcodes. IOURING_REGISTER_RESTRICTIONS can only be made once. Afterwards it is not possible to change restrictions anymore. This prevents untrusted code from removing restrictions. Suggested-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5f12ae6a415c..6e7f2e5e917b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -267,6 +267,7 @@ enum { IORING_REGISTER_PROBE = 8, IORING_REGISTER_PERSONALITY = 9, IORING_UNREGISTER_PERSONALITY = 10, + IORING_REGISTER_RESTRICTIONS = 11, /* this goes last */ IORING_REGISTER_LAST @@ -295,4 +296,34 @@ struct io_uring_probe { struct io_uring_probe_op ops[0]; }; +struct io_uring_restriction { + __u16 opcode; + union { + __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ + __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ + __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ + }; + __u8 resv; + __u32 resv2[3]; +}; + +/* + * io_uring_restriction->opcode values + */ +enum { + /* Allow an io_uring_register(2) opcode */ + IORING_RESTRICTION_REGISTER_OP = 0, + + /* Allow an sqe opcode */ + IORING_RESTRICTION_SQE_OP = 1, + + /* Allow sqe flags */ + IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, + + /* Require sqe flags (these flags must be set on each submission) */ + IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, + + IORING_RESTRICTION_LAST +}; + #endif -- cgit v1.2.3 From 7e84e1c7566a1df470a9e1f49d3db2ce311261a4 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Aug 2020 16:58:31 +0200 Subject: io_uring: allow disabling rings during the creation This patch adds a new IORING_SETUP_R_DISABLED flag to start the rings disabled, allowing the user to register restrictions, buffers, files, before to start processing SQEs. When IORING_SETUP_R_DISABLED is set, SQE are not processed and SQPOLL kthread is not started. The restrictions registration are allowed only when the rings are disable to prevent concurrency issue while processing SQEs. The rings can be enabled using IORING_REGISTER_ENABLE_RINGS opcode with io_uring_register(2). Suggested-by: Jens Axboe Signed-off-by: Stefano Garzarella Reviewed-by: Kees Cook Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6e7f2e5e917b..a0c85e0e9016 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -95,6 +95,7 @@ enum { #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ enum { IORING_OP_NOP, @@ -268,6 +269,7 @@ enum { IORING_REGISTER_PERSONALITY = 9, IORING_UNREGISTER_PERSONALITY = 10, IORING_REGISTER_RESTRICTIONS = 11, + IORING_REGISTER_ENABLE_RINGS = 12, /* this goes last */ IORING_REGISTER_LAST -- cgit v1.2.3 From 90554200724d5b280439dc361fe7ee92fe459ea7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Sep 2020 12:12:41 -0600 Subject: io_uring: provide IORING_ENTER_SQ_WAIT for SQPOLL SQ ring waits When using SQPOLL, applications can run into the issue of running out of SQ ring entries because the thread hasn't consumed them yet. The only option for dealing with that is checking later, or busy checking for the condition. Provide IORING_ENTER_SQ_WAIT if applications want to wait on this condition. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a0c85e0e9016..98d8e06dea22 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -225,6 +225,7 @@ struct io_cqring_offsets { */ #define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) /* * Passed in for io_uring_setup(2). Copied back with updated info on success -- cgit v1.2.3 From 792caccc4526bb489e054f9ab61d7c024b15dea2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 30 Sep 2020 15:49:26 -0700 Subject: bpf: Introduce BPF_F_PRESERVE_ELEMS for perf event array Currently, perf event in perf event array is removed from the array when the map fd used to add the event is closed. This behavior makes it difficult to the share perf events with perf event array. Introduce perf event map that keeps the perf event open with a new flag BPF_F_PRESERVE_ELEMS. With this flag set, perf events in the array are not removed when the original map fd is closed. Instead, the perf event will stay in the map until 1) it is explicitly removed from the array; or 2) the array is freed. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200930224927.1936644-2-songliubraving@fb.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1f17c6752deb..4f556cfcbfbe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -414,6 +414,9 @@ enum { /* Enable memory-mapping BPF map */ BPF_F_MMAPABLE = (1U << 10), + +/* Share perf_event among processes */ + BPF_F_PRESERVE_ELEMS = (1U << 11), }; /* Flags for BPF_PROG_QUERY. */ -- cgit v1.2.3 From 1e6aaae93e9ddb9dc664993eb949b1da94cab3a5 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 25 Sep 2020 09:32:43 -0700 Subject: iommu/uapi: Add argsz for user filled data As IOMMU UAPI gets extended, user data size may increase. To support backward compatibiliy, this patch introduces a size field to each UAPI data structures. It is *always* the responsibility for the user to fill in the correct size. Padding fields are adjusted to ensure 8 byte alignment. Specific scenarios for user data handling are documented in: Documentation/userspace-api/iommu.rst As there is no current users of the API, struct version is not incremented. Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/1601051567-54787-3-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index c2b2caf9ed41..b42acc8fe007 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -139,6 +139,7 @@ enum iommu_page_response_code { /** * struct iommu_page_response - Generic page response information + * @argsz: User filled size of this data * @version: API version of this structure * @flags: encodes whether the corresponding fields are valid * (IOMMU_FAULT_PAGE_RESPONSE_* values) @@ -147,6 +148,7 @@ enum iommu_page_response_code { * @code: response code from &enum iommu_page_response_code */ struct iommu_page_response { + __u32 argsz; #define IOMMU_PAGE_RESP_VERSION_1 1 __u32 version; #define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) @@ -222,6 +224,7 @@ struct iommu_inv_pasid_info { /** * struct iommu_cache_invalidate_info - First level/stage invalidation * information + * @argsz: User filled size of this data * @version: API version of this structure * @cache: bitfield that allows to select which caches to invalidate * @granularity: defines the lowest granularity used for the invalidation: @@ -250,6 +253,7 @@ struct iommu_inv_pasid_info { * must support the used granularity. */ struct iommu_cache_invalidate_info { + __u32 argsz; #define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 __u32 version; /* IOMMU paging structure cache */ @@ -259,7 +263,7 @@ struct iommu_cache_invalidate_info { #define IOMMU_CACHE_INV_TYPE_NR (3) __u8 cache; __u8 granularity; - __u8 padding[2]; + __u8 padding[6]; union { struct iommu_inv_pasid_info pasid_info; struct iommu_inv_addr_info addr_info; @@ -296,6 +300,7 @@ struct iommu_gpasid_bind_data_vtd { /** * struct iommu_gpasid_bind_data - Information about device and guest PASID binding + * @argsz: User filled size of this data * @version: Version of this data structure * @format: PASID table entry format * @flags: Additional information on guest bind request @@ -313,17 +318,18 @@ struct iommu_gpasid_bind_data_vtd { * PASID to host PASID based on this bind data. */ struct iommu_gpasid_bind_data { + __u32 argsz; #define IOMMU_GPASID_BIND_VERSION_1 1 __u32 version; #define IOMMU_PASID_FORMAT_INTEL_VTD 1 __u32 format; + __u32 addr_width; #define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ __u64 flags; __u64 gpgd; __u64 hpasid; __u64 gpasid; - __u32 addr_width; - __u8 padding[12]; + __u8 padding[8]; /* Vendor specific data */ union { struct iommu_gpasid_bind_data_vtd vtd; -- cgit v1.2.3 From 8d3bb3b8cbf2ffb3ef73720a48b3445518dcdb55 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 25 Sep 2020 09:32:44 -0700 Subject: iommu/uapi: Use named union for user data IOMMU UAPI data size is filled by the user space which must be validated by the kernel. To ensure backward compatibility, user data can only be extended by either re-purpose padding bytes or extend the variable sized union at the end. No size change is allowed before the union. Therefore, the minimum size is the offset of the union. To use offsetof() on the union, we must make it named. Signed-off-by: Jacob Pan Reviewed-by: Lu Baolu Reviewed-by: Eric Auger Link: https://lore.kernel.org/linux-iommu/20200611145518.0c2817d6@x1.home/ Link: https://lore.kernel.org/r/1601051567-54787-4-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index b42acc8fe007..5946779ac1f9 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -267,7 +267,7 @@ struct iommu_cache_invalidate_info { union { struct iommu_inv_pasid_info pasid_info; struct iommu_inv_addr_info addr_info; - }; + } granu; }; /** @@ -333,7 +333,7 @@ struct iommu_gpasid_bind_data { /* Vendor specific data */ union { struct iommu_gpasid_bind_data_vtd vtd; - }; + } vendor; }; #endif /* _UAPI_IOMMU_H */ -- cgit v1.2.3 From d90573812eea63c6bc8ab8a38f661b4c27c3cdc0 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 25 Sep 2020 09:32:46 -0700 Subject: iommu/uapi: Handle data and argsz filled by users IOMMU user APIs are responsible for processing user data. This patch changes the interface such that user pointers can be passed into IOMMU code directly. Separate kernel APIs without user pointers are introduced for in-kernel users of the UAPI functionality. IOMMU UAPI data has a user filled argsz field which indicates the data length of the structure. User data is not trusted, argsz must be validated based on the current kernel data size, mandatory data size, and feature flags. User data may also be extended, resulting in possible argsz increase. Backward compatibility is ensured based on size and flags (or the functional equivalent fields) checking. This patch adds sanity checks in the IOMMU layer. In addition to argsz, reserved/unused fields in padding, flags, and version are also checked. Details are documented in Documentation/userspace-api/iommu.rst Signed-off-by: Liu Yi L Signed-off-by: Jacob Pan Reviewed-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/1601051567-54787-6-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 5946779ac1f9..66d4ca40b40f 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -322,6 +322,7 @@ struct iommu_gpasid_bind_data { #define IOMMU_GPASID_BIND_VERSION_1 1 __u32 version; #define IOMMU_PASID_FORMAT_INTEL_VTD 1 +#define IOMMU_PASID_FORMAT_LAST 2 __u32 format; __u32 addr_width; #define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ -- cgit v1.2.3 From 6278eecba31f3983fe2743fc01b198433aa18247 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 25 Sep 2020 09:32:47 -0700 Subject: iommu/vt-d: Check UAPI data processed by IOMMU core IOMMU generic layer already does sanity checks on UAPI data for version match and argsz range based on generic information. This patch adjusts the following data checking responsibilities: - removes the redundant version check from VT-d driver - removes the check for vendor specific data size - adds check for the use of reserved/undefined flags Signed-off-by: Jacob Pan Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/1601051567-54787-7-git-send-email-jacob.jun.pan@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 66d4ca40b40f..e1d9e75f2c94 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -288,6 +288,7 @@ struct iommu_gpasid_bind_data_vtd { #define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ #define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ #define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ +#define IOMMU_SVA_VTD_GPASID_LAST (1 << 6) __u64 flags; __u32 pat; __u32 emt; -- cgit v1.2.3 From 61931c0ee9cf5da575996b977a2358b598ef84bb Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 1 Oct 2020 15:00:56 -0400 Subject: dm: export dm_copy_name_and_uuid Allow DM targets to access the configured name and uuid. Also, bump DM ioctl version. Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 6622912c2342..4933b6b67b85 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -272,9 +272,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 42 +#define DM_VERSION_MINOR 43 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2020-02-27)" +#define DM_VERSION_EXTRA "-ioctl (2020-10-01)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From ba1df797e5bbba68ddd1a29bd658b1c11f9a60b6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Oct 2020 18:59:07 -0400 Subject: NFSACL: Replace PROC() macro with open code Clean up: Follow-up on ten-year-old commit b9081d90f5b9 ("NFS: kill off complicated macro 'PROC'") by performing the same conversion in the NFSACL code. To reduce the chance of error, I copied the original C preprocessor output and then made some minor edits. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfsacl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfsacl.h b/include/uapi/linux/nfsacl.h index ca9a8501ff30..2c2ad204d3b0 100644 --- a/include/uapi/linux/nfsacl.h +++ b/include/uapi/linux/nfsacl.h @@ -9,11 +9,13 @@ #define NFS_ACL_PROGRAM 100227 +#define ACLPROC2_NULL 0 #define ACLPROC2_GETACL 1 #define ACLPROC2_SETACL 2 #define ACLPROC2_GETATTR 3 #define ACLPROC2_ACCESS 4 +#define ACLPROC3_NULL 0 #define ACLPROC3_GETACL 1 #define ACLPROC3_SETACL 2 -- cgit v1.2.3 From 4976b718c3551faba2c0616ef55ebeb74db1c5ca Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:44 -0700 Subject: bpf: Introduce pseudo_btf_id Pseudo_btf_id is a type of ld_imm insn that associates a btf_id to a ksym so that further dereferences on the ksym can use the BTF info to validate accesses. Internally, when seeing a pseudo_btf_id ld insn, the verifier reads the btf_id stored in the insn[0]'s imm field and marks the dst_reg as PTR_TO_BTF_ID. The btf_id points to a VAR_KIND, which is encoded in btf_vminux by pahole. If the VAR is not of a struct type, the dst reg will be marked as PTR_TO_MEM instead of PTR_TO_BTF_ID and the mem_size is resolved to the size of the VAR's type. >From the VAR btf_id, the verifier can also read the address of the ksym's corresponding kernel var from kallsyms and use that to fill dst_reg. Therefore, the proper functionality of pseudo_btf_id depends on (1) kallsyms and (2) the encoding of kernel global VARs in pahole, which should be available since pahole v1.18. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-2-haoluo@google.com --- include/uapi/linux/bpf.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4f556cfcbfbe..2aa156af24d6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -356,18 +356,36 @@ enum bpf_link_type { #define BPF_F_SLEEPABLE (1U << 4) /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_FD + * insn[0].imm: map fd + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 +/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE + * insn[0].imm: map fd + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ #define BPF_PSEUDO_MAP_VALUE 2 +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function -- cgit v1.2.3 From eaa6bcb71ef6ed3dc18fc525ee7e293b06b4882b Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:47 -0700 Subject: bpf: Introduce bpf_per_cpu_ptr() Add bpf_per_cpu_ptr() to help bpf programs access percpu vars. bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel except that it may return NULL. This happens when the cpu parameter is out of range. So the caller must check the returned value. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-5-haoluo@google.com --- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2aa156af24d6..f3c1b637ab39 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3686,6 +3686,23 @@ union bpf_attr { * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3841,6 +3858,7 @@ union bpf_attr { FN(seq_printf_btf), \ FN(skb_cgroup_classid), \ FN(redirect_neigh), \ + FN(bpf_per_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 29 Sep 2020 16:50:48 -0700 Subject: bpf: Introducte bpf_this_cpu_ptr() Add bpf_this_cpu_ptr() to help access percpu var on this cpu. This helper always returns a valid pointer, therefore no need to check returned value for NULL. Also note that all programs run with preemption disabled, which means that the returned pointer is stable during all the execution of the program. Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200929235049.2533242-6-haoluo@google.com --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3c1b637ab39..c446394135be 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3703,6 +3703,18 @@ union bpf_attr { * Return * A pointer pointing to the kernel percpu variable on *cpu*, or * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3859,6 +3871,7 @@ union bpf_attr { FN(skb_cgroup_classid), \ FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ + FN(bpf_this_cpu_ptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From f5ace5ef37b1e1de49882248656f35c45e041585 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 2 Oct 2020 18:10:33 -0500 Subject: block: scsi_ioctl: Avoid the use of one-element arrays One-element arrays are being deprecated[1]. Replace the one-element array with a simple object of type compat_caddr_t: 'compat_caddr_t unused'[2], once it seems this field is actually never used. Also, update struct cdrom_generic_command in UAPI by adding an anonimous union to avoid using the one-element array _reserved_. [1] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays [2] https://github.com/KSPP/linux/issues/86 Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/5f76f5d0.qJ4t%2FHWuRzSW7bTa%25lkp@intel.com/ Build-tested-by: kernel test robot Signed-off-by: Jens Axboe --- include/uapi/linux/cdrom.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h index 2817230148fd..6c34f6e2f1f7 100644 --- a/include/uapi/linux/cdrom.h +++ b/include/uapi/linux/cdrom.h @@ -289,7 +289,10 @@ struct cdrom_generic_command unsigned char data_direction; int quiet; int timeout; - void __user *reserved[1]; /* unused, actually */ + union { + void __user *reserved[1]; /* unused, actually */ + void __user *unused; + }; }; /* -- cgit v1.2.3 From 50a896cf2d6f34e884a00139d6e6012c9833ace3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 3 Oct 2020 10:44:45 +0200 Subject: genetlink: properly support per-op policy dumping Add support for per-op policy dumping. The data is pretty much as before, except that now the assumption that the policy with index 0 is "the" policy no longer holds - you now need to look at the new CTRL_ATTR_OP_POLICY attribute which is a nested attr (indexed by op) containing attributes for do and dump policies. When a single op is requested, the CTRL_ATTR_OP_POLICY will be added in the same way, since do and dump policies may differ. v2: - conditionally advertise per-command policies only if there actually is a policy being used for the do/dump and it's present at all Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 9c0636ec2286..bc9c98e84828 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -64,6 +64,7 @@ enum { CTRL_ATTR_OPS, CTRL_ATTR_MCAST_GROUPS, CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, __CTRL_ATTR_MAX, }; @@ -85,6 +86,15 @@ enum { __CTRL_ATTR_MCAST_GRP_MAX, }; +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) -- cgit v1.2.3 From e992a6eda9a1eeeab73a8d2792464e4a2b1ebc3b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 3 Oct 2020 10:44:46 +0200 Subject: genetlink: allow dumping command-specific policy Right now CTRL_CMD_GETPOLICY can only dump the family-wide policy. Support dumping policy of a specific op. v3: - rebase after per-op policy export and handle that v2: - make cmd U32, just in case. v1: - don't echo op in the output in a naive way, this should make it cleaner to extend the output format for dumping policies for all the commands at once in the future. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20201001225933.1373426-11-kuba@kernel.org Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index bc9c98e84828..d83f214b4134 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -65,6 +65,7 @@ enum { CTRL_ATTR_MCAST_GROUPS, CTRL_ATTR_POLICY, CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, __CTRL_ATTR_MAX, }; -- cgit v1.2.3 From 19fbcb36a39eefbe8912a13ccc02e937b1c418d6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 Oct 2020 00:44:28 +0200 Subject: net/sched: act_vlan: Add {POP,PUSH}_ETH actions Implement TCA_VLAN_ACT_POP_ETH and TCA_VLAN_ACT_PUSH_ETH, to respectively pop and push a base Ethernet header at the beginning of a frame. POP_ETH is just a matter of pulling ETH_HLEN bytes. VLAN tags, if any, must be stripped before calling POP_ETH. PUSH_ETH is restricted to skbs with no mac_header, and only the MAC addresses can be configured. The Ethertype is automatically set from skb->protocol. These restrictions ensure that all skb's fields remain consistent, so that this action can't confuse other part of the networking stack (like GSO). Since openvswitch already had these actions, consolidate the code in skbuff.c (like for vlan and mpls push/pop). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_vlan.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 168995b54a70..5b306fe815cc 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -16,6 +16,8 @@ #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 #define TCA_VLAN_ACT_MODIFY 3 +#define TCA_VLAN_ACT_POP_ETH 4 +#define TCA_VLAN_ACT_PUSH_ETH 5 struct tc_vlan { tc_gen; @@ -30,6 +32,8 @@ enum { TCA_VLAN_PUSH_VLAN_PROTOCOL, TCA_VLAN_PAD, TCA_VLAN_PUSH_VLAN_PRIORITY, + TCA_VLAN_PUSH_ETH_DST, + TCA_VLAN_PUSH_ETH_SRC, __TCA_VLAN_MAX, }; #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) -- cgit v1.2.3 From a45294af9e96a3e060b6272fa7cd2c4b196de335 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 Oct 2020 00:44:31 +0200 Subject: net/sched: act_mpls: Add action to push MPLS LSE before Ethernet header Define the MAC_PUSH action which pushes an MPLS LSE before the mac header (instead of between the mac and the network headers as the plain PUSH action does). The only special case is when the skb has an offloaded VLAN. In that case, it has to be inlined before pushing the MPLS header. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_mpls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_mpls.h b/include/uapi/linux/tc_act/tc_mpls.h index 9360e95273c7..9e4e8f52a779 100644 --- a/include/uapi/linux/tc_act/tc_mpls.h +++ b/include/uapi/linux/tc_act/tc_mpls.h @@ -10,6 +10,7 @@ #define TCA_MPLS_ACT_PUSH 2 #define TCA_MPLS_ACT_MODIFY 3 #define TCA_MPLS_ACT_DEC_TTL 4 +#define TCA_MPLS_ACT_MAC_PUSH 5 struct tc_mpls { tc_gen; /* generic TC action fields. */ -- cgit v1.2.3 From cf1166349c68816f4259d32559f54972b0d5c1a4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 4 Oct 2020 18:12:51 +0200 Subject: net: devlink: Add unused port flavour Not all ports of a switch need to be used, particularly in embedded systems. Add a port flavour for ports which physically exist in the switch, but are not connected to the front panel etc, and so are unused. By having unused ports present in devlink, it gives a more accurate representation of the hardware. It also allows regions to be associated to such ports, so allowing, for example, to determine unused ports are correctly powered off, or to compare probable reset defaults of unused ports to used ports experiences issues. Actually registering unused ports and setting the flavour to unused is optional. The DSA core will register all such switch ports, but such ports are expected to be limited in number. Bigger ASICs may decide not to list unused ports. v2: Expand the description about why it is useful Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ba467dc07852..5f1d6c327670 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -197,6 +197,9 @@ enum devlink_port_flavour { * port that faces the PCI VF. */ DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */ + DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but + * is not used in any way. + */ }; enum devlink_param_cmode { -- cgit v1.2.3 From bdbb4e29df8b790db50cb73ce25d23543329f05f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 5 Oct 2020 15:07:38 -0700 Subject: netlink: add mask validation We don't have good validation policy for existing unsigned int attrs which serve as flags (for new ones we could use NLA_BITFIELD32). With increased use of policy dumping having the validation be expressed as part of the policy is important. Add validation policy in form of a mask of supported/valid bits. Support u64 in the uAPI to be future-proof, but really for now the embedded mask member can only hold 32 bits, so anything with bit 32+ set will always fail validation. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index eac8a6a648ea..d02e472ba54c 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -331,6 +331,7 @@ enum netlink_attribute_type { * the index, if limited inside the nesting (U32) * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the * bitfield32 type (U32) + * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment */ enum netlink_policy_type_attr { @@ -346,6 +347,7 @@ enum netlink_policy_type_attr { NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, NL_POLICY_TYPE_ATTR_PAD, + NL_POLICY_TYPE_ATTR_MASK, /* keep last */ __NL_POLICY_TYPE_ATTR_MAX, -- cgit v1.2.3 From eb88531bdbfaafb827192d1fc6c5a3fcc4fadd96 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sun, 27 Sep 2020 01:24:31 +0900 Subject: can: raw: add missing error queue support Error queue are not yet implemented in CAN-raw sockets. The problem: a userland call to recvmsg(soc, msg, MSG_ERRQUEUE) on a CAN-raw socket would unqueue messages from the normal queue without any kind of error or warning. As such, it prevented CAN drivers from using the functionalities that relies on the error queue such as skb_tx_timestamp(). SCM_CAN_RAW_ERRQUEUE is defined as the type for the CAN raw error queue. SCM stands for "Socket control messages". The name is inspired from SCM_J1939_ERRQUEUE of include/uapi/linux/can/j1939.h. Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20200926162527.270030-1-mailhol.vincent@wanadoo.fr Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/raw.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index 6a11d308eb5c..3386aa81fdf2 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -49,6 +49,9 @@ #include #define SOL_CAN_RAW (SOL_CAN_BASE + CAN_RAW) +enum { + SCM_CAN_RAW_ERRQUEUE = 1, +}; /* for socket options affecting the socket (not the global system) */ -- cgit v1.2.3 From 1465af12e254a68706e110846f59cf0f09683184 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 22 Sep 2020 10:37:01 +0800 Subject: btrfs: tree-checker: fix false alert caused by legacy btrfs root item Commit 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check") introduced btrfs root item size check, however btrfs root item has two versions, the legacy one which just ends before generation_v2 member, is smaller than current btrfs root item size. This caused btrfs kernel to reject valid but old tree root leaves. Fix this problem by also allowing legacy root item, since kernel can already handle them pretty well and upgrade to newer root item format when needed. Reported-by: Martin Steigerwald Fixes: 259ee7754b67 ("btrfs: tree-checker: Add ROOT_ITEM check") CC: stable@vger.kernel.org # 5.4+ Tested-By: Martin Steigerwald Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 9ba64ca6b4ac..6b885982ece6 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -4,6 +4,11 @@ #include #include +#ifdef __KERNEL__ +#include +#else +#include +#endif /* * This header contains the structure definitions and constants used @@ -644,6 +649,15 @@ struct btrfs_root_item { __le64 reserved[8]; /* for future */ } __attribute__ ((__packed__)); +/* + * Btrfs root item used to be smaller than current size. The old format ends + * at where member generation_v2 is. + */ +static inline __u32 btrfs_legacy_root_item_size(void) +{ + return offsetof(struct btrfs_root_item, generation_v2); +} + /* * this is used for both forward and backward root refs */ -- cgit v1.2.3 From 49f3d12b0f70ea867b891ad2a97f6e51bb564e18 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 7 Oct 2020 07:57:17 +0200 Subject: bpf: Fix typo in uapi/linux/bpf.h Reported-by: Samanta Navarro Signed-off-by: Jakub Wilk Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201007055717.7319-1-jwilk@jwilk.net --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c446394135be..d83561e8cd2c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2253,7 +2253,7 @@ union bpf_attr { * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The -- cgit v1.2.3 From fb1ff4c1941573aea59e4cb575dc5a723303cd70 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 5 Oct 2020 20:36:45 +0300 Subject: vfio/fsl-mc: Add VFIO framework skeleton for fsl-mc devices DPAA2 (Data Path Acceleration Architecture) consists in mechanisms for processing Ethernet packets, queue management, accelerators, etc. The Management Complex (mc) is a hardware entity that manages the DPAA2 hardware resources. It provides an object-based abstraction for software drivers to use the DPAA2 hardware. The MC mediates operations such as create, discover, destroy of DPAA2 objects. The MC provides memory-mapped I/O command interfaces (MC portals) which DPAA2 software drivers use to operate on DPAA2 objects. A DPRC is a container object that holds other types of DPAA2 objects. Each object in the DPRC is a Linux device and bound to a driver. The MC-bus driver is a platform driver (different from PCI or platform bus). The DPRC driver does runtime management of a bus instance. It performs the initial scan of the DPRC and handles changes in the DPRC configuration (adding/removing objects). All objects inside a container share the same hardware isolation context, meaning that only an entire DPRC can be assigned to a virtual machine. When a container is assigned to a virtual machine, all the objects within that container are assigned to that virtual machine. The DPRC container assigned to the virtual machine is not allowed to change contents (add/remove objects) by the guest. The restriction is set by the host and enforced by the mc hardware. The DPAA2 objects can be directly assigned to the guest. However the MC portals (the memory mapped command interface to the MC) need to be emulated because there are commands that configure the interrupts and the isolation IDs which are virtual in the guest. Example: echo vfio-fsl-mc > /sys/bus/fsl-mc/devices/dprc.2/driver_override echo dprc.2 > /sys/bus/fsl-mc/drivers/vfio-fsl-mc/bind The dprc.2 is bound to the VFIO driver and all the objects within dprc.2 are going to be bound to the VFIO driver. This patch adds the infrastructure for VFIO support for fsl-mc devices. Subsequent patches will add support for binding and secure assigning these devices using VFIO. More details about the DPAA2 objects can be found here: Documentation/networking/device_drivers/freescale/dpaa2/overview.rst Signed-off-by: Bharat Bhushan Signed-off-by: Diana Craciun Reviewed-by: Eric Auger Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 920470502329..95deac891378 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -201,6 +201,7 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ +#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ }; -- cgit v1.2.3 From 0c633f0be1dc70a6db46d90dba4cdae82073350a Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Wed, 7 Oct 2020 14:56:22 -0400 Subject: vfio: Introduce capability definitions for VFIO_DEVICE_GET_INFO Allow the VFIO_DEVICE_GET_INFO ioctl to include a capability chain. Add a flag indicating capability chain support, and introduce the definitions for the first set of capabilities which are specified to s390 zPCI devices. Signed-off-by: Matthew Rosato Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 11 ++++++ include/uapi/linux/vfio_zdev.h | 78 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 include/uapi/linux/vfio_zdev.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 920470502329..04fbe425ad0c 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -201,8 +201,10 @@ struct vfio_device_info { #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ +#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ __u32 num_regions; /* Max region index + 1 */ __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ }; #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) @@ -218,6 +220,15 @@ struct vfio_device_info { #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" #define VFIO_DEVICE_API_AP_STRING "vfio-ap" +/* + * The following capabilities are unique to s390 zPCI devices. Their contents + * are further-defined in vfio_zdev.h + */ +#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 +#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 +#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 +#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 + /** * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, * struct vfio_region_info) diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h new file mode 100644 index 000000000000..b4309397b6b2 --- /dev/null +++ b/include/uapi/linux/vfio_zdev.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * VFIO Region definitions for ZPCI devices + * + * Copyright IBM Corp. 2020 + * + * Author(s): Pierre Morel + * Matthew Rosato + */ + +#ifndef _VFIO_ZDEV_H_ +#define _VFIO_ZDEV_H_ + +#include +#include + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information + * + * This capability provides a set of descriptive information about the + * associated PCI function. + */ +struct vfio_device_info_cap_zpci_base { + struct vfio_info_cap_header header; + __u64 start_dma; /* Start of available DMA addresses */ + __u64 end_dma; /* End of available DMA addresses */ + __u16 pchid; /* Physical Channel ID */ + __u16 vfn; /* Virtual function number */ + __u16 fmb_length; /* Measurement Block Length (in bytes) */ + __u8 pft; /* PCI Function Type */ + __u8 gid; /* PCI function group ID */ +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information + * + * This capability provides a set of descriptive information about the group of + * PCI functions that the associated device belongs to. + */ +struct vfio_device_info_cap_zpci_group { + struct vfio_info_cap_header header; + __u64 dasm; /* DMA Address space mask */ + __u64 msi_addr; /* MSI address */ + __u64 flags; +#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */ + __u16 mui; /* Measurement Block Update Interval */ + __u16 noi; /* Maximum number of MSIs */ + __u16 maxstbl; /* Maximum Store Block Length */ + __u8 version; /* Supported PCI Version */ +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String + * + * This capability provides the utility string for the associated device, which + * is a device identifier string made up of EBCDID characters. 'size' specifies + * the length of 'util_str'. + */ +struct vfio_device_info_cap_zpci_util { + struct vfio_info_cap_header header; + __u32 size; + __u8 util_str[]; +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path + * + * This capability provides the PCI function path string, which is an identifier + * that describes the internal hardware path of the device. 'size' specifies + * the length of 'pfip'. + */ +struct vfio_device_info_cap_zpci_pfip { + struct vfio_info_cap_header header; + __u32 size; + __u8 pfip[]; +}; + +#endif -- cgit v1.2.3 From e057dd3fc20ffb3d7f150af46542a51b59b90127 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 28 Sep 2020 22:04:04 +0200 Subject: can: add ISO 15765-2:2016 transport protocol CAN Transport Protocols offer support for segmented Point-to-Point communication between CAN nodes via two defined CAN Identifiers. As CAN frames can only transport a small amount of data bytes (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this segmentation is needed to transport longer PDUs as needed e.g. for vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic. This protocol driver implements data transfers according to ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20200928200404.82229-1-socketcan@hartkopp.net [mkl: Removed "WITH Linux-syscall-note" from isotp.c. Fixed indention, a checkpatch warning and typos. Replaced __u{8,32} by u{8,32}. Removed always false (optlen < 0) check in isotp_setsockopt().] Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 166 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 include/uapi/linux/can/isotp.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h new file mode 100644 index 000000000000..553006509f4e --- /dev/null +++ b/include/uapi/linux/can/isotp.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * linux/can/isotp.h + * + * Definitions for isotp CAN sockets (ISO 15765-2:2016) + * + * Copyright (c) 2020 Volkswagen Group Electronic Research + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Volkswagen nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * Alternatively, provided that this notice is retained in full, this + * software may be distributed under the terms of the GNU General + * Public License ("GPL") version 2, in which case the provisions of the + * GPL apply INSTEAD OF those given above. + * + * The provided data structures and external interfaces from this code + * are not restricted to be used by modules with a GPL compatible license. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _UAPI_CAN_ISOTP_H +#define _UAPI_CAN_ISOTP_H + +#include +#include + +#define SOL_CAN_ISOTP (SOL_CAN_BASE + CAN_ISOTP) + +/* for socket options affecting the socket (not the global system) */ + +#define CAN_ISOTP_OPTS 1 /* pass struct can_isotp_options */ + +#define CAN_ISOTP_RECV_FC 2 /* pass struct can_isotp_fc_options */ + +/* sockopts to force stmin timer values for protocol regression tests */ + +#define CAN_ISOTP_TX_STMIN 3 /* pass __u32 value in nano secs */ + /* use this time instead of value */ + /* provided in FC from the receiver */ + +#define CAN_ISOTP_RX_STMIN 4 /* pass __u32 value in nano secs */ + /* ignore received CF frames which */ + /* timestamps differ less than val */ + +#define CAN_ISOTP_LL_OPTS 5 /* pass struct can_isotp_ll_options */ + +struct can_isotp_options { + + __u32 flags; /* set flags for isotp behaviour. */ + /* __u32 value : flags see below */ + + __u32 frame_txtime; /* frame transmission time (N_As/N_Ar) */ + /* __u32 value : time in nano secs */ + + __u8 ext_address; /* set address for extended addressing */ + /* __u8 value : extended address */ + + __u8 txpad_content; /* set content of padding byte (tx) */ + /* __u8 value : content on tx path */ + + __u8 rxpad_content; /* set content of padding byte (rx) */ + /* __u8 value : content on rx path */ + + __u8 rx_ext_address; /* set address for extended addressing */ + /* __u8 value : extended address (rx) */ +}; + +struct can_isotp_fc_options { + + __u8 bs; /* blocksize provided in FC frame */ + /* __u8 value : blocksize. 0 = off */ + + __u8 stmin; /* separation time provided in FC frame */ + /* __u8 value : */ + /* 0x00 - 0x7F : 0 - 127 ms */ + /* 0x80 - 0xF0 : reserved */ + /* 0xF1 - 0xF9 : 100 us - 900 us */ + /* 0xFA - 0xFF : reserved */ + + __u8 wftmax; /* max. number of wait frame transmiss. */ + /* __u8 value : 0 = omit FC N_PDU WT */ +}; + +struct can_isotp_ll_options { + + __u8 mtu; /* generated & accepted CAN frame type */ + /* __u8 value : */ + /* CAN_MTU (16) -> standard CAN 2.0 */ + /* CANFD_MTU (72) -> CAN FD frame */ + + __u8 tx_dl; /* tx link layer data length in bytes */ + /* (configured maximum payload length) */ + /* __u8 value : 8,12,16,20,24,32,48,64 */ + /* => rx path supports all LL_DL values */ + + __u8 tx_flags; /* set into struct canfd_frame.flags */ + /* at frame creation: e.g. CANFD_BRS */ + /* Obsolete when the BRS flag is fixed */ + /* by the CAN netdriver configuration */ +}; + +/* flags for isotp behaviour */ + +#define CAN_ISOTP_LISTEN_MODE 0x001 /* listen only (do not send FC) */ +#define CAN_ISOTP_EXTEND_ADDR 0x002 /* enable extended addressing */ +#define CAN_ISOTP_TX_PADDING 0x004 /* enable CAN frame padding tx path */ +#define CAN_ISOTP_RX_PADDING 0x008 /* enable CAN frame padding rx path */ +#define CAN_ISOTP_CHK_PAD_LEN 0x010 /* check received CAN frame padding */ +#define CAN_ISOTP_CHK_PAD_DATA 0x020 /* check received CAN frame padding */ +#define CAN_ISOTP_HALF_DUPLEX 0x040 /* half duplex error state handling */ +#define CAN_ISOTP_FORCE_TXSTMIN 0x080 /* ignore stmin from received FC */ +#define CAN_ISOTP_FORCE_RXSTMIN 0x100 /* ignore CFs depending on rx stmin */ +#define CAN_ISOTP_RX_EXT_ADDR 0x200 /* different rx extended addressing */ +#define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ + + +/* default values */ + +#define CAN_ISOTP_DEFAULT_FLAGS 0 +#define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 +#define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ +#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 +#define CAN_ISOTP_DEFAULT_RECV_BS 0 +#define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 +#define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 + +#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 + +/* + * Remark on CAN_ISOTP_DEFAULT_RECV_* values: + * + * We can strongly assume, that the Linux Kernel implementation of + * CAN_ISOTP is capable to run with BS=0, STmin=0 and WFTmax=0. + * But as we like to be able to behave as a commonly available ECU, + * these default settings can be changed via sockopts. + * For that reason the STmin value is intentionally _not_ checked for + * consistency and copied directly into the flow control (FC) frame. + * + */ + +#endif /* !_UAPI_CAN_ISOTP_H */ -- cgit v1.2.3 From eca43ee6c46db92dd850ce659316b0680d70e137 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Fri, 9 Oct 2020 07:03:25 +0000 Subject: bpf: Add tcp_notsent_lowat bpf setsockopt Adding support for TCP_NOTSENT_LOWAT sockoption (https://lwn.net/Articles/560082/) in tcp bpf programs. Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20201009070325.226855-1-tehnerd@tehnerd.com --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d83561e8cd2c..42d2df799397 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1698,7 +1698,7 @@ union bpf_attr { * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return -- cgit v1.2.3 From 8858e8d98d5457ba23bcd0d99ce23e272b8b09a1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 9 Oct 2020 18:07:14 +0900 Subject: block: fix uapi blkzoned.h comments Update the kdoc comments for struct blk_zone (capacity field description missing) and for struct blk_zone_report (flags field description missing). Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 42c3366cc25f..656a326821a2 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -93,12 +93,15 @@ enum blk_zone_report_flags { * @non_seq: Flag indicating that the zone is using non-sequential resources * (for host-aware zoned block devices only). * @reset: Flag indicating that a zone reset is recommended. - * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size. + * @resv: Padding for 8B alignment. + * @capacity: Zone usable capacity in 512 B sector units + * @reserved: Padding to 64 B to match the ZBC, ZAC and ZNS defined zone + * descriptor size. * - * start, len and wp use the regular 512 B sector unit, regardless of the - * device logical block size. The overall structure size is 64 B to match the - * ZBC/ZAC defined zone descriptor and allow support for future additional - * zone information. + * start, len, capacity and wp use the regular 512 B sector unit, regardless + * of the device logical block size. The overall structure size is 64 B to + * match the ZBC, ZAC and ZNS defined zone descriptor and allow support for + * future additional zone information. */ struct blk_zone { __u64 start; /* Zone start sector */ @@ -118,7 +121,7 @@ struct blk_zone { * * @sector: starting sector of report * @nr_zones: IN maximum / OUT actual - * @reserved: padding to 16 byte alignment + * @flags: one or more flags as defined by enum blk_zone_report_flags. * @zones: Space to hold @nr_zones @zones entries on reply. * * The array of at most @nr_zones must follow this structure in memory. -- cgit v1.2.3 From ccdf07219da6bd1f43c6ddcde4c0e36993c7365a Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 7 Oct 2020 09:00:43 +0300 Subject: devlink: Add reload action option to devlink reload command Add devlink reload action to allow the user to request a specific reload action. The action parameter is optional, if not specified then devlink driver re-init action is used (backward compatible). Note that when required to do firmware activation some drivers may need to reload the driver. On the other hand some drivers may need to reset the firmware to reinitialize the driver entities. Therefore, the devlink reload command returns the actions which were actually performed. Reload actions supported are: driver_reinit: driver entities re-initialization, applying devlink-param and devlink-resource values. fw_activate: firmware activate. command examples: $devlink dev reload pci/0000:82:00.0 action driver_reinit reload_actions_performed: driver_reinit $devlink dev reload pci/0000:82:00.0 action fw_activate reload_actions_performed: driver_reinit fw_activate Signed-off-by: Moshe Shemesh Reviewed-by: Jakub Kicinski Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 5f1d6c327670..74bdad252c36 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -301,6 +301,16 @@ enum { DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE, }; +enum devlink_reload_action { + DEVLINK_RELOAD_ACTION_UNSPEC, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, /* Driver entities re-instantiation */ + DEVLINK_RELOAD_ACTION_FW_ACTIVATE, /* FW activate */ + + /* Add new reload actions above */ + __DEVLINK_RELOAD_ACTION_MAX, + DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1 +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -493,6 +503,9 @@ enum devlink_attr { DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT, /* u64 */ DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, /* bitfield32 */ + DEVLINK_ATTR_RELOAD_ACTION, /* u8 */ + DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* bitfield32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From dc64cc7c63102ac78bac3cfbc00ef3abd7a3fdf3 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 7 Oct 2020 09:00:44 +0300 Subject: devlink: Add devlink reload limit option Add reload limit to demand restrictions on reload actions. Reload limits supported: no_reset: No reset allowed, no down time allowed, no link flap and no configuration is lost. By default reload limit is unspecified and so no constraints on reload actions are required. Some combinations of action and limit are invalid. For example, driver can not reinitialize its entities without any downtime. The no_reset reload limit will have usecase in this patchset to implement restricted fw_activate on mlx5. Have the uapi parameter of reload limit ready for future support of multiselection. Signed-off-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 74bdad252c36..82a5e66c1518 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -311,6 +311,19 @@ enum devlink_reload_action { DEVLINK_RELOAD_ACTION_MAX = __DEVLINK_RELOAD_ACTION_MAX - 1 }; +enum devlink_reload_limit { + DEVLINK_RELOAD_LIMIT_UNSPEC, /* unspecified, no constraints */ + DEVLINK_RELOAD_LIMIT_NO_RESET, /* No reset allowed, no down time allowed, + * no link flap and no configuration is lost. + */ + + /* Add new reload limit above */ + __DEVLINK_RELOAD_LIMIT_MAX, + DEVLINK_RELOAD_LIMIT_MAX = __DEVLINK_RELOAD_LIMIT_MAX - 1 +}; + +#define DEVLINK_RELOAD_LIMITS_VALID_MASK (BIT(__DEVLINK_RELOAD_LIMIT_MAX) - 1) + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -505,6 +518,7 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION, /* u8 */ DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* bitfield32 */ + DEVLINK_ATTR_RELOAD_LIMITS, /* bitfield32 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From a254c264267e8746fb257806c166e54375cf9c06 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 7 Oct 2020 09:00:45 +0300 Subject: devlink: Add reload stats Add reload stats to hold the history per reload action type and limit. For example, the number of times fw_activate has been performed on this device since the driver module was added or if the firmware activation was performed with or without reset. Add devlink notification on stats update. Expose devlink reload stats to the user through devlink dev get command. Examples: $ devlink dev show pci/0000:82:00.0: stats: reload: driver_reinit 2 fw_activate 1 fw_activate_no_reset 0 pci/0000:82:00.1: stats: reload: driver_reinit 1 fw_activate 0 fw_activate_no_reset 0 $ devlink dev show -jp { "dev": { "pci/0000:82:00.0": { "stats": { "reload": { "driver_reinit": 2, "fw_activate": 1, "fw_activate_no_reset": 0 } } }, "pci/0000:82:00.1": { "stats": { "reload": { "driver_reinit": 1, "fw_activate": 0, "fw_activate_no_reset": 0 } } } } } Signed-off-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 82a5e66c1518..ab15fc597b74 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -520,6 +520,12 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, /* bitfield32 */ DEVLINK_ATTR_RELOAD_LIMITS, /* bitfield32 */ + DEVLINK_ATTR_DEV_STATS, /* nested */ + DEVLINK_ATTR_RELOAD_STATS, /* nested */ + DEVLINK_ATTR_RELOAD_STATS_ENTRY, /* nested */ + DEVLINK_ATTR_RELOAD_STATS_LIMIT, /* u8 */ + DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From 77069ba2e3adf48c472fbbd9cbd7a4f5370b17df Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 7 Oct 2020 09:00:46 +0300 Subject: devlink: Add remote reload stats Add remote reload stats to hold the history of actions performed due devlink reload commands initiated by remote host. For example, in case firmware activation with reset finished successfully but was initiated by remote host. The function devlink_remote_reload_actions_performed() is exported to enable drivers update on remote reload actions performed as it was not initiated by their own devlink instance. Expose devlink remote reload stats to the user through devlink dev get command. Examples: $ devlink dev show pci/0000:82:00.0: stats: reload: driver_reinit 2 fw_activate 1 fw_activate_no_reset 0 remote_reload: driver_reinit 0 fw_activate 0 fw_activate_no_reset 0 pci/0000:82:00.1: stats: reload: driver_reinit 1 fw_activate 0 fw_activate_no_reset 0 remote_reload: driver_reinit 1 fw_activate 1 fw_activate_no_reset 0 $ devlink dev show -jp { "dev": { "pci/0000:82:00.0": { "stats": { "reload": { "driver_reinit": 2, "fw_activate": 1, "fw_activate_no_reset": 0 }, "remote_reload": { "driver_reinit": 0, "fw_activate": 0, "fw_activate_no_reset": 0 } } }, "pci/0000:82:00.1": { "stats": { "reload": { "driver_reinit": 1, "fw_activate": 0, "fw_activate_no_reset": 0 }, "remote_reload": { "driver_reinit": 1, "fw_activate": 1, "fw_activate_no_reset": 0 } } } } } Signed-off-by: Moshe Shemesh Reviewed-by: Jakub Kicinski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ab15fc597b74..0113bc4db9f5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -525,6 +525,7 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_STATS_ENTRY, /* nested */ DEVLINK_ATTR_RELOAD_STATS_LIMIT, /* u8 */ DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */ + DEVLINK_ATTR_REMOTE_RELOAD_STATS, /* nested */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From 44f3625bc61653ea3bde9960298faf2f5518fda5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Oct 2020 12:45:17 +0200 Subject: netlink: export policy in extended ACK Add a new attribute NLMSGERR_ATTR_POLICY to the extended ACK to advertise the policy, e.g. if an attribute was out of range, you'll know the range that's permissible. Add new NL_SET_ERR_MSG_ATTR_POL() and NL_SET_ERR_MSG_ATTR_POL() macros to set this, since realistically it's only useful to do this when the bad attribute (offset) is also returned. Use it in lib/nlattr.c which practically does all the policy validation. v2: - add and use netlink_policy_dump_attr_size_estimate() v3: - remove redundant break v4: - really remove redundant break ... sorry Reviewed-by: Jakub Kicinski Signed-off-by: Johannes Berg Signed-off-by: Jakub Kicinski --- include/uapi/linux/netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index d02e472ba54c..c3816ff7bfc3 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -129,6 +129,7 @@ struct nlmsgerr { * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to * be used - in the success case - to identify a created * object or operation or similar (binary) + * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute * @__NLMSGERR_ATTR_MAX: number of attributes * @NLMSGERR_ATTR_MAX: highest attribute number */ @@ -137,6 +138,7 @@ enum nlmsgerr_attrs { NLMSGERR_ATTR_MSG, NLMSGERR_ATTR_OFFS, NLMSGERR_ATTR_COOKIE, + NLMSGERR_ATTR_POLICY, __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 -- cgit v1.2.3 From dd2ce6a5373c6f5c830be54be10775458a8bd312 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:01 +0200 Subject: bpf: Improve bpf_redirect_neigh helper description Follow-up to address David's feedback that we should better describe internals of the bpf_redirect_neigh() helper. Suggested-by: David Ahern Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: David Ahern Link: https://lore.kernel.org/bpf/20201010234006.7075-2-daniel@iogearbox.net --- include/uapi/linux/bpf.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42d2df799397..4272cc53d478 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3679,10 +3679,14 @@ union bpf_attr { * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it - * fills in e.g. MAC addresses based on the L3 information from - * the packet. This helper is supported for IPv4 and IPv6 protocols. + * populates L2 addresses as well, meaning, internally, the helper + * performs a FIB lookup based on the skb's networking header to + * get the address of the next hop and then relies on the neighbor + * lookup for the L2 address of the nexthop. + * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types. + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. -- cgit v1.2.3 From 9aa1206e8f48222f35a0c809f33b2f4aaa1e2661 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:02 +0200 Subject: bpf: Add redirect_peer helper Add an efficient ingress to ingress netns switch that can be used out of tc BPF programs in order to redirect traffic from host ns ingress into a container veth device ingress without having to go via CPU backlog queue [0]. For local containers this can also be utilized and path via CPU backlog queue only needs to be taken once, not twice. On a high level this borrows from ipvlan which does similar switch in __netif_receive_skb_core() and then iterates via another_round. This helps to reduce latency for mentioned use cases. Pod to remote pod with redirect(), TCP_RR [1]: # percpu_netperf 10.217.1.33 RT_LATENCY: 122.450 (per CPU: 122.666 122.401 122.333 122.401 ) MEAN_LATENCY: 121.210 (per CPU: 121.100 121.260 121.320 121.160 ) STDDEV_LATENCY: 120.040 (per CPU: 119.420 119.910 125.460 115.370 ) MIN_LATENCY: 46.500 (per CPU: 47.000 47.000 47.000 45.000 ) P50_LATENCY: 118.500 (per CPU: 118.000 119.000 118.000 119.000 ) P90_LATENCY: 127.500 (per CPU: 127.000 128.000 127.000 128.000 ) P99_LATENCY: 130.750 (per CPU: 131.000 131.000 129.000 132.000 ) TRANSACTION_RATE: 32666.400 (per CPU: 8152.200 8169.842 8174.439 8169.897 ) Pod to remote pod with redirect_peer(), TCP_RR: # percpu_netperf 10.217.1.33 RT_LATENCY: 44.449 (per CPU: 43.767 43.127 45.279 45.622 ) MEAN_LATENCY: 45.065 (per CPU: 44.030 45.530 45.190 45.510 ) STDDEV_LATENCY: 84.823 (per CPU: 66.770 97.290 84.380 90.850 ) MIN_LATENCY: 33.500 (per CPU: 33.000 33.000 34.000 34.000 ) P50_LATENCY: 43.250 (per CPU: 43.000 43.000 43.000 44.000 ) P90_LATENCY: 46.750 (per CPU: 46.000 47.000 47.000 47.000 ) P99_LATENCY: 52.750 (per CPU: 51.000 54.000 53.000 53.000 ) TRANSACTION_RATE: 90039.500 (per CPU: 22848.186 23187.089 22085.077 21919.130 ) [0] https://linuxplumbersconf.org/event/7/contributions/674/attachments/568/1002/plumbers_2020_cilium_load_balancer.pdf [1] https://github.com/borkmann/netperf_scripts/blob/master/percpu_netperf Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20201010234006.7075-3-daniel@iogearbox.net --- include/uapi/linux/bpf.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4272cc53d478..b97bc5abb3b8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3719,6 +3719,22 @@ union bpf_attr { * never return NULL. * Return * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types at the ingress + * hook and for veth device types. The peer device must reside in a + * different network namespace. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3876,6 +3892,7 @@ union bpf_attr { FN(redirect_neigh), \ FN(bpf_per_cpu_ptr), \ FN(bpf_this_cpu_ptr), \ + FN(redirect_peer), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 4a8f87e60f6db40e640f1db555d063b2c4dea5f1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 11 Oct 2020 01:40:03 +0200 Subject: bpf: Allow for map-in-map with dynamic inner array map entries Recent work in f4d05259213f ("bpf: Add map_meta_equal map ops") and 134fede4eecf ("bpf: Relax max_entries check for most of the inner map types") added support for dynamic inner max elements for most map-in-map types. Exceptions were maps like array or prog array where the map_gen_lookup() callback uses the maps' max_entries field as a constant when emitting instructions. We recently implemented Maglev consistent hashing into Cilium's load balancer which uses map-in-map with an outer map being hash and inner being array holding the Maglev backend table for each service. This has been designed this way in order to reduce overall memory consumption given the outer hash map allows to avoid preallocating a large, flat memory area for all services. Also, the number of service mappings is not always known a-priori. The use case for dynamic inner array map entries is to further reduce memory overhead, for example, some services might just have a small number of back ends while others could have a large number. Right now the Maglev backend table for small and large number of backends would need to have the same inner array map entries which adds a lot of unneeded overhead. Dynamic inner array map entries can be realized by avoiding the inlined code generation for their lookup. The lookup will still be efficient since it will be calling into array_map_lookup_elem() directly and thus avoiding retpoline. The patch adds a BPF_F_INNER_MAP flag to map creation which therefore skips inline code generation and relaxes array_map_meta_equal() check to ignore both maps' max_entries. This also still allows to have faster lookups for map-in-map when BPF_F_INNER_MAP is not specified and hence dynamic max_entries not needed. Example code generation where inner map is dynamic sized array: # bpftool p d x i 125 int handle__sys_enter(void * ctx): ; int handle__sys_enter(void *ctx) 0: (b4) w1 = 0 ; int key = 0; 1: (63) *(u32 *)(r10 -4) = r1 2: (bf) r2 = r10 ; 3: (07) r2 += -4 ; inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); 4: (18) r1 = map[id:468] 6: (07) r1 += 272 7: (61) r0 = *(u32 *)(r2 +0) 8: (35) if r0 >= 0x3 goto pc+5 9: (67) r0 <<= 3 10: (0f) r0 += r1 11: (79) r0 = *(u64 *)(r0 +0) 12: (15) if r0 == 0x0 goto pc+1 13: (05) goto pc+1 14: (b7) r0 = 0 15: (b4) w6 = -1 ; if (!inner_map) 16: (15) if r0 == 0x0 goto pc+6 17: (bf) r2 = r10 ; 18: (07) r2 += -4 ; val = bpf_map_lookup_elem(inner_map, &key); 19: (bf) r1 = r0 | No inlining but instead 20: (85) call array_map_lookup_elem#149280 | call to array_map_lookup_elem() ; return val ? *val : -1; | for inner array lookup. 21: (15) if r0 == 0x0 goto pc+1 ; return val ? *val : -1; 22: (61) r6 = *(u32 *)(r0 +0) ; } 23: (bc) w0 = w6 24: (95) exit Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201010234006.7075-4-daniel@iogearbox.net --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b97bc5abb3b8..bf5a99d803e4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -435,6 +435,9 @@ enum { /* Share perf_event among processes */ BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), }; /* Flags for BPF_PROG_QUERY. */ -- cgit v1.2.3 From 60a3815da702fd9e4759945f26cce5c47d3967ad Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 8 Oct 2020 01:14:47 +0200 Subject: netfilter: add inet ingress support This patch adds the NF_INET_INGRESS pseudohook for the NFPROTO_INET family. This is a mapping this new hook to the existing NFPROTO_NETDEV and NF_NETDEV_INGRESS hook. The hook does not guarantee that packets are inet only, users must filter out non-ip traffic explicitly. This infrastructure makes it easier to support this new hook in nf_tables. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index ca9e63d6e0e4..6a6179af0d7c 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -45,6 +45,7 @@ enum nf_inet_hooks { NF_INET_FORWARD, NF_INET_LOCAL_OUT, NF_INET_POST_ROUTING, + NF_INET_INGRESS, NF_INET_NUMHOOKS }; -- cgit v1.2.3 From ac911bfeb34b5d79fb4e23a08b8db0b89c529b53 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Oct 2020 09:43:53 +0200 Subject: can: isotp: implement cleanups / improvements from review As pointed out by Jakub Kicinski here: http://lore.kernel.org/r/20201009175751.5c54097f@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com this patch addresses the remarked issues: - remove empty line in comment - remove default=y for CAN_ISOTP in Kconfig - make use of pr_notice_once() - use GFP_ATOMIC instead of gfp_any() in soft hrtimer context The version strings in the CAN subsystem are removed by a separate patch. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201012074354.25839-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 553006509f4e..7793b26aa154 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -160,7 +160,6 @@ struct can_isotp_ll_options { * these default settings can be changed via sockopts. * For that reason the STmin value is intentionally _not_ checked for * consistency and copied directly into the flow control (FC) frame. - * */ #endif /* !_UAPI_CAN_ISOTP_H */ -- cgit v1.2.3 From d25e2e9388eda61b6e298585024ee3355f50c493 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 14 Oct 2020 21:34:32 +0200 Subject: netfilter: restore NF_INET_NUMHOOKS This definition is used by the iptables legacy UAPI, restore it. Fixes: d3519cb89f6d ("netfilter: nf_tables: add inet ingress support") Reported-by: Jason A. Donenfeld Tested-by: Jason A. Donenfeld Signed-off-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- include/uapi/linux/netfilter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 6a6179af0d7c..ef9a44286e23 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -45,8 +45,8 @@ enum nf_inet_hooks { NF_INET_FORWARD, NF_INET_LOCAL_OUT, NF_INET_POST_ROUTING, - NF_INET_INGRESS, - NF_INET_NUMHOOKS + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, }; enum nf_dev_hooks { -- cgit v1.2.3 From 346e320cb2103edef709c4466a29140c4a8e527a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 15 Oct 2020 18:39:27 +0200 Subject: netfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements nftables payload statements are used to mangle SCTP headers, but they can only replace the Internet Checksum. As a consequence, nftables rules that mangle sport/dport/vtag in SCTP headers potentially generate packets that are discarded by the receiver, unless the CRC-32C is "offloaded" (e.g the rule mangles a skb having 'ip_summed' equal to 'CHECKSUM_PARTIAL'. Fix this extending uAPI definitions and L4 checksum update function, in a way that userspace programs (e.g. nft) can instruct the kernel to compute CRC-32C in SCTP headers. Also ensure that LIBCRC32C is built if NF_TABLES is 'y' or 'm' in the kernel build configuration. Signed-off-by: Davide Caratti Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 352ee51707a1..98272cb5f617 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -749,10 +749,12 @@ enum nft_payload_bases { * * @NFT_PAYLOAD_CSUM_NONE: no checksumming * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791) + * @NFT_PAYLOAD_CSUM_SCTP: CRC-32c, for use in SCTP header (RFC 3309) */ enum nft_payload_csum_types { NFT_PAYLOAD_CSUM_NONE, NFT_PAYLOAD_CSUM_INET, + NFT_PAYLOAD_CSUM_SCTP, }; enum nft_payload_csum_flags { -- cgit v1.2.3 From 8c39076c276be0b31982e44654e2c2357473258a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 16 Oct 2020 09:25:45 -0400 Subject: NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag RFC 7862 introduced a new flag that either client or server is allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS. Client needs to update its bitmask to allow for this flag value. v2: changed minor version argument to unsigned int Signed-off-by: Olga Kornievskaia CC: Signed-off-by: Anna Schumaker --- include/uapi/linux/nfs4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index bf197e99b98f..ed5415e0f1c1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -139,6 +139,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -146,6 +148,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 -- cgit v1.2.3 From c6e5f02b5281a3166a9b7b4d66830cc234421ba5 Mon Sep 17 00:00:00 2001 From: "Saheed O. Bolarinwa" Date: Thu, 15 Oct 2020 14:30:31 -0500 Subject: PCI/ASPM: Remove struct aspm_register_info.support Previously we stored the "ASPM Support" field from the Link Capabilities register in the struct aspm_register_info. Read the Link Capabilities directly when needed and remove it from the struct aspm_register_info. No functional change intended. [bhelgaas: remove pci_dev cached copy since LNKCAP isn't truly read-only, add PCI_EXP_LNKCAP_ASPM_L0S & PCI_EXP_LNKCAP_ASPM_L1, check them directly instead of adding aspm_support()] Link: https://lore.kernel.org/r/20201015193039.12585-5-helgaas@kernel.org Signed-off-by: Saheed O. Bolarinwa Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index f9701410d3b5..06846ec2e071 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -532,6 +532,8 @@ #define PCI_EXP_LNKCAP_SLS_32_0GB 0x00000005 /* LNKCAP2 SLS Vector bit 4 */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_ASPM_L0S 0x00000400 /* ASPM L0s Support */ +#define PCI_EXP_LNKCAP_ASPM_L1 0x00000800 /* ASPM L1 Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ #define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ #define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* Clock Power Management */ -- cgit v1.2.3 From df8f10587d3d11b055d54138994a1a9a681da0c4 Mon Sep 17 00:00:00 2001 From: "Saheed O. Bolarinwa" Date: Thu, 15 Oct 2020 14:30:39 -0500 Subject: PCI/ASPM: Remove struct pcie_link_state.l1ss Previously we computed L1.2 parameters in the enumeration path, saved them in struct pcie_link_state.l1ss, and programmed them into the devices whenever we enabled or disabled L1.2 on the link. But these parameters are constant and don't need to be updated when enabling/disabling L1.2. Compute and program the L1.2 parameters once during enumeration and remove the struct pcie_link_state.l1ss member. No functional change intended. [bhelgaas: rework to program L1.2 parameters during enumeration] Link: https://lore.kernel.org/r/20201015193039.12585-13-helgaas@kernel.org Signed-off-by: Saheed O. Bolarinwa Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 06846ec2e071..c7e0acba0e20 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1058,6 +1058,7 @@ #define PCI_L1SS_CTL1_PCIPM_L1_1 0x00000002 /* PCI-PM L1.1 Enable */ #define PCI_L1SS_CTL1_ASPM_L1_2 0x00000004 /* ASPM L1.2 Enable */ #define PCI_L1SS_CTL1_ASPM_L1_1 0x00000008 /* ASPM L1.1 Enable */ +#define PCI_L1SS_CTL1_L1_2_MASK 0x00000005 #define PCI_L1SS_CTL1_L1SS_MASK 0x0000000f #define PCI_L1SS_CTL1_CM_RESTORE_TIME 0x0000ff00 /* Common_Mode_Restore_Time */ #define PCI_L1SS_CTL1_LTR_L12_TH_VALUE 0x03ff0000 /* LTR_L1.2_THRESHOLD_Value */ -- cgit v1.2.3 From f3d301c1f2f5676465cdf3259737ea19cc82731f Mon Sep 17 00:00:00 2001 From: Al Grant Date: Mon, 21 Sep 2020 21:46:37 +0100 Subject: perf: correct SNOOPX field offset perf_event.h has macros that define the field offsets in the data_src bitmask in perf records. The SNOOPX and REMOTE offsets were both 37. These are distinct fields, and the bitfield layout in perf_mem_data_src confirms that SNOOPX should be at offset 38. Fixes: 52839e653b5629bd ("perf tools: Add support for printing new mem_info encodings") Signed-off-by: Al Grant Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Link: https://lkml.kernel.org/r/4ac9f5cc-4388-b34a-9999-418a4099415d@foss.arm.com --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 077e7ee69e3d..b95d3c485d27 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 37 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ -- cgit v1.2.3 From 66570e966dd9cb4fd57811d0056c6472a14a2c41 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 18 Aug 2020 15:24:28 +0000 Subject: kvm: x86: only provide PV features if enabled in guest's CPUID KVM unconditionally provides PV features to the guest, regardless of the configured CPUID. An unwitting guest that doesn't check KVM_CPUID_FEATURES before use could access paravirt features that userspace did not intend to provide. Fix this by checking the guest's CPUID before performing any paravirtual operations. Introduce a capability, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, to gate the aforementioned enforcement. Migrating a VM from a host w/o this patch to a host with this patch could silently change the ABI exposed to the guest, warranting that we default to the old behavior and opt-in for the new one. Reviewed-by: Jim Mattson Reviewed-by: Peter Shier Signed-off-by: Oliver Upton Change-Id: I202a0926f65035b872bfe8ad15307c026de59a98 Message-Id: <20200818152429.1923996-4-oupton@google.com> Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 58f43aa1fc21..ca41220b40b8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1052,6 +1052,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_STEAL_TIME 187 #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ba452c9e996d8a4c347b32805f91abb70de5de7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 20 Oct 2020 23:25:56 +0200 Subject: bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on the discussion in [0], update the bpf_redirect_neigh() helper to accept an optional parameter specifying the nexthop information. This makes it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without incurring a duplicate FIB lookup - since the FIB lookup helper will return the nexthop information even if no neighbour is present, this can simply be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen. [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/ Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: David Ahern Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk --- include/uapi/linux/bpf.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ -- cgit v1.2.3