From e40fbbf0572c5e41dc87ad79001748ed399ce32d Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 19 Jan 2022 11:44:40 +0000 Subject: uapi/bpf: Add missing description and returns for helper documentation Both description and returns section will become mandatory for helpers and syscalls in a later commit to generate man pages. This commit also adds in the documentation that BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN for anyone searching for the syscall in the generated man pages. Signed-off-by: Usama Arif Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220119114442.1452088-1-usama.arif@bytedance.com --- include/uapi/linux/bpf.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b0383d371b9a..a9c96c21330a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -330,6 +330,8 @@ union bpf_iter_link_info { * *ctx_out*, *data_in* and *data_out* must be NULL. * *repeat* must be zero. * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -1775,6 +1777,8 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -1782,6 +1786,8 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. @@ -2256,6 +2262,8 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * @@ -2369,6 +2377,8 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * * long bpf_get_numa_node_id(void) * Description @@ -2466,6 +2476,8 @@ union bpf_attr { * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -3240,6 +3252,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. -- cgit v1.2.3 From b44123b4a3dcad4664d3a0f72c011ffd4c9c4d93 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 16 Dec 2021 02:04:27 +0000 Subject: bpf: Add cgroup helpers bpf_{get,set}_retval to get/set syscall return value The helpers continue to use int for retval because all the hooks are int-returning rather than long-returning. The return value of bpf_set_retval is int for future-proofing, in case in the future there may be errors trying to set the retval. After the previous patch, if a program rejects a syscall by returning 0, an -EPERM will be generated no matter if the retval is already set to -err. This patch change it being forced only if retval is not -err. This is because we want to support, for example, invoking bpf_set_retval(-EINVAL) and return 0, and have the syscall return value be -EINVAL not -EPERM. For BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY, the prior behavior is that, if the return value is NET_XMIT_DROP, the packet is silently dropped. We preserve this behavior for backward compatibility reasons, so even if an errno is set, the errno does not return to caller. However, setting a non-err to retval cannot propagate so this is not allowed and we return a -EFAULT in that case. Signed-off-by: YiFei Zhu Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/r/b4013fd5d16bed0b01977c1fafdeae12e1de61fb.1639619851.git.zhuyifei@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9c96c21330a..fe2272defcd9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5033,6 +5033,22 @@ union bpf_attr { * * Return * The number of arguments of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * The syscall's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the syscall's return value that will be returned to userspace. + * + * This helper is currently supported by cgroup programs only. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5221,6 +5237,8 @@ union bpf_attr { FN(get_func_arg), \ FN(get_func_ret), \ FN(get_func_arg_cnt), \ + FN(get_retval), \ + FN(set_retval), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From c2f2cdbeffda7b153c19e0f3d73149c41026c0db Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:52 +0100 Subject: bpf: introduce BPF_F_XDP_HAS_FRAGS flag in prog_flags loading the ebpf program Introduce BPF_F_XDP_HAS_FRAGS and the related field in bpf_prog_aux in order to notify the driver the loaded program support xdp frags. Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/db2e8075b7032a356003f407d1b0deb99adaa0ed.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fe2272defcd9..945649c67e03 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1113,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * -- cgit v1.2.3 From 0165cc817075cf701e4289838f1d925ff1911b3e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:09:54 +0100 Subject: bpf: introduce bpf_xdp_get_buff_len helper Introduce bpf_xdp_get_buff_len helper in order to return the xdp buffer total size (linear and paged area) Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/aac9ac3504c84026cf66a3c71b7c5ae89bc991be.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 945649c67e03..5a28772063f6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5054,6 +5054,12 @@ union bpf_attr { * This helper is currently supported by cgroup programs only. * Return * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5244,6 +5250,7 @@ union bpf_attr { FN(get_func_arg_cnt), \ FN(get_retval), \ FN(set_retval), \ + FN(xdp_get_buff_len), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 3f364222d032eea6b245780e845ad213dab28cdd Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 21 Jan 2022 11:10:03 +0100 Subject: net: xdp: introduce bpf_xdp_pointer utility routine Similar to skb_header_pointer, introduce bpf_xdp_pointer utility routine to return a pointer to a given position in the xdp_buff if the requested area (offset + len) is contained in a contiguous memory area otherwise it will be copied in a bounce buffer provided by the caller. Similar to the tc counterpart, introduce the two following xdp helpers: - bpf_xdp_load_bytes - bpf_xdp_store_bytes Reviewed-by: Eelco Chaudron Acked-by: Toke Hoiland-Jorgensen Acked-by: John Fastabend Acked-by: Jakub Kicinski Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/ab285c1efdd5b7a9d361348b1e7d3ef49f6382b3.1642758637.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5a28772063f6..16a7574292a5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5060,6 +5060,22 @@ union bpf_attr { * Get the total size of a given xdp buff (linear and paged area) * Return * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5251,6 +5267,8 @@ union bpf_attr { FN(get_retval), \ FN(set_retval), \ FN(xdp_get_buff_len), \ + FN(xdp_load_bytes), \ + FN(xdp_store_bytes), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 376040e47334c6dc6a939a32197acceb00fe4acf Mon Sep 17 00:00:00 2001 From: Kenny Yu Date: Mon, 24 Jan 2022 10:54:01 -0800 Subject: bpf: Add bpf_copy_from_user_task() helper This adds a helper for bpf programs to read the memory of other tasks. As an example use case at Meta, we are using a bpf task iterator program and this new helper to print C++ async stack traces for all threads of a given process. Signed-off-by: Kenny Yu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220124185403.468466-3-kennyyu@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 16a7574292a5..4a2f7041ebae 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5076,6 +5076,16 @@ union bpf_attr { * associated to *xdp_md*, at *offset*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5269,6 +5279,7 @@ union bpf_attr { FN(xdp_get_buff_len), \ FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ + FN(copy_from_user_task), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 9690ae60429020f38e4aa2540c306f27eb021bc0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jan 2022 10:42:59 -0800 Subject: ethtool: add header/data split indication For applications running on a mix of platforms it's useful to have a clear indication whether host's NIC supports the geometry requirements of TCP zero-copy. TCP zero-copy Rx requires data to be neatly placed into memory pages. Most NICs can't do that. This patch is adding GET support only, since the NICs I work with either always have the feature enabled or enable it whenever MTU is set to jumbo. In other words I don't need SET. But adding set should be trivial. (The only note on SET is that we will likely want the setting to be "sticky" and use 0 / `unknown` to reset it back to driver default.) Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index cca6e474a085..417d4280d7b5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -318,6 +318,12 @@ enum { /* RINGS */ +enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + enum { ETHTOOL_A_RINGS_UNSPEC, ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ @@ -330,6 +336,7 @@ enum { ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From b2a90f4fcb146d0e033203ab646f0fd22cfa947f Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Jan 2022 11:20:22 +0100 Subject: media: lirc: remove unused lirc features These features have never been implemented by any lirc driver, including staging or out of tree drivers. The ioctls for these feaures were removed in commit d55f09abe24b ("[media] lirc.h: remove several unused ioctls"). So, we can safely remove them. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 9919f2062b14..a1f9c26ea537 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -72,11 +72,9 @@ #define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) #define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 #define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 #define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 #define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 #define LIRC_CAN_MEASURE_CARRIER 0x02000000 #define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 @@ -84,8 +82,6 @@ #define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) #define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - /*** IOCTL commands for lirc driver ***/ #define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) -- cgit v1.2.3 From 68a99f6a0ebfe9101ea79ba5af1c407a5ad4f629 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 15 Jan 2022 11:19:11 +0100 Subject: media: lirc: report ir receiver overflow If the driver reports that the hardware had an overflow, report this to userspace. It would be nice to know when this happens, and not just get a long space. This change has been tested with lircd, ir-ctl, and ir-keytable. Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index a1f9c26ea537..21c69a6a100d 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -16,14 +16,16 @@ #define LIRC_MODE2_PULSE 0x01000000 #define LIRC_MODE2_FREQUENCY 0x02000000 #define LIRC_MODE2_TIMEOUT 0x03000000 +#define LIRC_MODE2_OVERFLOW 0x04000000 #define LIRC_VALUE_MASK 0x00FFFFFF #define LIRC_MODE2_MASK 0xFF000000 -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +#define LIRC_SPACE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) +#define LIRC_PULSE(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) +#define LIRC_FREQUENCY(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) +#define LIRC_TIMEOUT(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +#define LIRC_OVERFLOW(val) (((val) & LIRC_VALUE_MASK) | LIRC_MODE2_OVERFLOW) #define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) #define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) @@ -32,6 +34,7 @@ #define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) #define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) #define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) +#define LIRC_IS_OVERFLOW(val) (LIRC_MODE2(val) == LIRC_MODE2_OVERFLOW) /* used heavily by lirc userspace */ #define lirc_t int -- cgit v1.2.3 From f6c6804c43fa18d3cee64b55490dfbd3bef1363a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 28 Jan 2022 15:40:25 +0000 Subject: kvm: Move KVM_GET_XSAVE2 IOCTL definition at the end of kvm.h This way we can more easily find the next free IOCTL number when adding new IOCTLs. Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer") Signed-off-by: Janosch Frank Message-Id: <20220128154025.102666-1-frankja@linux.ibm.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b46bcdb0cab1..5191b57e1562 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1624,9 +1624,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -2048,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From e187013abeb4c2a7ec8a4bb978844c7e92a1a6ec Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Mon, 31 Jan 2022 16:31:21 +0300 Subject: txhash: Make rethinking txhash behavior configurable via sysctl Add a per ns sysctl that controls the txhash rethink behavior: net.core.txrehash. When enabled, the same behavior is retained, when disabled, rethink is not performed. Sysctl is enabled by default. Signed-off-by: Akhmat Karakotov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/socket.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index eb0a9a5b6e71..0accd6102ece 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,4 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DISABLED 0 +#define SOCK_TXREHASH_ENABLED 1 + #endif /* _UAPI_LINUX_SOCKET_H */ -- cgit v1.2.3 From 26859240e4ee701e0379f08634957adaff67e43a Mon Sep 17 00:00:00 2001 From: Akhmat Karakotov Date: Mon, 31 Jan 2022 16:31:22 +0300 Subject: txhash: Add socket option to control TX hash rethink behavior Add the SO_TXREHASH socket option to control hash rethink behavior per socket. When default mode is set, sockets disable rehash at initialization and use sysctl option when entering listen state. setsockopt() overrides default behavior. Signed-off-by: Akhmat Karakotov Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index 0accd6102ece..51d6bb2f6765 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,6 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) +#define SOCK_TXREHASH_DEFAULT ((u8)-1) #define SOCK_TXREHASH_DISABLED 0 #define SOCK_TXREHASH_ENABLED 1 -- cgit v1.2.3 From 4421a582718ab81608d8486734c18083b822390d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 30 Jan 2022 12:55:17 +0100 Subject: bpf: Make dst_port field in struct bpf_sock 16-bit wide Menglong Dong reports that the documentation for the dst_port field in struct bpf_sock is inaccurate and confusing. From the BPF program PoV, the field is a zero-padded 16-bit integer in network byte order. The value appears to the BPF user as if laid out in memory as so: offsetof(struct bpf_sock, dst_port) + 0 + 8 +16 0x00 +24 0x00 32-, 16-, and 8-bit wide loads from the field are all allowed, but only if the offset into the field is 0. 32-bit wide loads from dst_port are especially confusing. The loaded value, after converting to host byte order with bpf_ntohl(dst_port), contains the port number in the upper 16-bits. Remove the confusion by splitting the field into two 16-bit fields. For backward compatibility, allow 32-bit wide loads from offsetof(struct bpf_sock, dst_port). While at it, allow loads 8-bit loads at offset [0] and [1] from dst_port. Reported-by: Menglong Dong Signed-off-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20220130115518.213259-2-jakub@cloudflare.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a2f7041ebae..a7f0ddedac1f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5574,7 +5574,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; -- cgit v1.2.3 From bfdf4e6208051ed7165b2e92035b4bf11f43eb63 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 27 Jan 2022 10:27:20 -0500 Subject: rseq: Remove broken uapi field layout on 32-bit little endian The rseq rseq_cs.ptr.{ptr32,padding} uapi endianness handling is entirely wrong on 32-bit little endian: a preprocessor logic mistake wrongly uses the big endian field layout on 32-bit little endian architectures. Fortunately, those ptr32 accessors were never used within the kernel, and only meant as a convenience for user-space. Remove those and replace the whole rseq_cs union by a __u64 type, as this is the only thing really needed to express the ABI. Document how 32-bit architectures are meant to interact with this field. Fixes: ec9c82e03a74 ("rseq: uapi: Declare rseq_cs field as union, update includes") Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220127152720.25898-1-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 9a402fdb60e9..77ee207623a9 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -105,23 +105,11 @@ struct rseq { * Read and set by the kernel. Set by user-space with single-copy * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. + * + * 32-bit architectures should update the low order bits of the + * rseq_cs field, leaving the high order bits initialized to 0. */ - union { - __u64 ptr64; -#ifdef __LP64__ - __u64 ptr; -#else - struct { -#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) - __u32 padding; /* Initialized to zero. */ - __u32 ptr32; -#else /* LITTLE */ - __u32 ptr32; - __u32 padding; /* Initialized to zero. */ -#endif /* ENDIAN */ - } ptr; -#endif - } rseq_cs; + __u64 rseq_cs; /* * Restartable sequences flags field. -- cgit v1.2.3 From ddecd22878601a606d160680fa85802b75d92eb6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Jan 2022 11:34:07 +0100 Subject: perf: uapi: Document perf_event_attr::sig_data truncation on 32 bit architectures Due to the alignment requirements of siginfo_t, as described in 3ddb3fd8cdb0 ("signal, perf: Fix siginfo_t by avoiding u64 on 32-bit architectures"), siginfo_t::si_perf_data is limited to an unsigned long. However, perf_event_attr::sig_data is an u64, to avoid having to deal with compat conversions. Due to being an u64, it may not immediately be clear to users that sig_data is truncated on 32 bit architectures. Add a comment to explicitly point this out, and hopefully help some users save time by not having to deduce themselves what's happening. Reported-by: Dmitry Vyukov Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dmitry Vyukov Link: https://lore.kernel.org/r/20220131103407.1971678-3-elver@google.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..82858b697c05 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -465,6 +465,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; -- cgit v1.2.3 From c86d86131ab75696fc52d98571148842e067d620 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 2 Feb 2022 06:09:04 +0300 Subject: Partially revert "net/smc: Add netlink net namespace support" The change of sizeof(struct smc_diag_linkinfo) by commit 79d39fc503b4 ("net/smc: Add netlink net namespace support") introduced an ABI regression: since struct smc_diag_lgrinfo contains an object of type "struct smc_diag_linkinfo", offset of all subsequent members of struct smc_diag_lgrinfo was changed by that change. As result, applications compiled with the old version of struct smc_diag_linkinfo will receive garbage in struct smc_diag_lgrinfo.role if the kernel implements this new version of struct smc_diag_linkinfo. Fix this regression by reverting the part of commit 79d39fc503b4 that changes struct smc_diag_linkinfo. After all, there is SMC_GEN_NETLINK interface which is good enough, so there is probably no need to touch the smc_diag ABI in the first place. Fixes: 79d39fc503b4 ("net/smc: Add netlink net namespace support") Signed-off-by: Dmitry V. Levin Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220202030904.GA9742@altlinux.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/smc_diag.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4..8cb3a6fef553 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { -- cgit v1.2.3 From e4b1eb24ce5a696ef7229f9926ff34d7502f0582 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 27 Jan 2022 11:34:53 -0800 Subject: thermal: netlink: Add a new event to notify CPU capabilities change Add a new netlink event to notify change in CPU capabilities in terms of performance and efficiency. Firmware may change CPU capabilities as a result of thermal events in the system or to account for changes in the TDP (thermal design power) level. This notification type will allow user space to avoid running workloads on certain CPUs or proactively adjust power limits to avoid future events. The netlink message consists of a nested attribute (THERMAL_GENL_ATTR_CPU_CAPABILITY) with three attributes: * THERMAL_GENL_ATTR_CPU_CAPABILITY_ID (type u32): -- logical CPU number * THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE (type u32): -- Scaled performance from 0-1023 * THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY (type u32): -- Scaled efficiency from 0-1023 Reviewed-by: Len Brown Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/uapi/linux/thermal.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 9aa2fedfa309..fc78bf3aead7 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -44,7 +44,10 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_CDEV_MAX_STATE, THERMAL_GENL_ATTR_CDEV_NAME, THERMAL_GENL_ATTR_GOV_NAME, - + THERMAL_GENL_ATTR_CPU_CAPABILITY, + THERMAL_GENL_ATTR_CPU_CAPABILITY_ID, + THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE, + THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) @@ -71,6 +74,7 @@ enum thermal_genl_event { THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */ THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */ THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */ + THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */ __THERMAL_GENL_EVENT_MAX, }; #define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1) -- cgit v1.2.3 From d1ca60efc53d665cf89ed847a14a510a81770b81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Feb 2022 12:00:56 +0100 Subject: netfilter: ctnetlink: disable helper autoassign When userspace, e.g. conntrackd, inserts an entry with a specified helper, its possible that the helper is lost immediately after its added: ctnetlink_create_conntrack -> nf_ct_helper_ext_add + assign helper -> ctnetlink_setup_nat -> ctnetlink_parse_nat_setup -> parse_nat_setup -> nfnetlink_parse_nat_setup -> nf_nat_setup_info -> nf_conntrack_alter_reply -> __nf_ct_try_assign_helper ... and __nf_ct_try_assign_helper will zero the helper again. Set IPS_HELPER bit to bypass auto-assign logic, its unwanted, just like when helper is assigned via ruleset. Dropped old 'not strictly necessary' comment, it referred to use of rcu_assign_pointer() before it got replaced by RCU_INIT_POINTER(). NB: Fixes tag intentionally incorrect, this extends the referenced commit, but this change won't build without IPS_HELPER introduced there. Fixes: 6714cf5465d280 ("netfilter: nf_conntrack: fix explicit helper attachment and NAT") Reported-by: Pham Thanh Tuyen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15..26071021e986 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), -- cgit v1.2.3 From 8b5413647262dda8d8d0e07e14ea1de9ac7cf0b2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 17 Jan 2022 21:56:13 +0100 Subject: netfilter: nfqueue: enable to get skb->priority This info could be useful to improve traffic analysis. Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index aed90c4df0c8..ef7c97f21a15 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -61,6 +61,7 @@ enum nfqnl_attr_type { NFQA_SECCTX, /* security context string */ NFQA_VLAN, /* nested attribute: packet vlan info */ NFQA_L2HDR, /* full L2 header */ + NFQA_PRIORITY, /* skb->priority */ __NFQA_MAX }; -- cgit v1.2.3 From be847673cfffce8bb6e9ed6ae186081280c58831 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 2 Feb 2022 15:25:53 +0100 Subject: uapi: ioam: Insertion frequency Add the insertion frequency uapi for IOAM lwtunnels. Signed-off-by: Justin Iurman Signed-off-by: Jakub Kicinski --- include/uapi/linux/ioam6_iptunnel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h index 829ffdfcacca..38f6a8fdfd34 100644 --- a/include/uapi/linux/ioam6_iptunnel.h +++ b/include/uapi/linux/ioam6_iptunnel.h @@ -41,6 +41,15 @@ enum { /* IOAM Trace Header */ IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ + /* Insertion frequency: + * "k over n" packets (0 < k <= n) + * [0.0001% ... 100%] + */ +#define IOAM6_IPTUNNEL_FREQ_MIN 1 +#define IOAM6_IPTUNNEL_FREQ_MAX 1000000 + IOAM6_IPTUNNEL_FREQ_K, /* u32 */ + IOAM6_IPTUNNEL_FREQ_N, /* u32 */ + __IOAM6_IPTUNNEL_MAX, }; -- cgit v1.2.3 From 3698807094ecae945436921325f5c309d1123f11 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 24 Aug 2021 16:13:41 -0400 Subject: drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Checkpoint-Restore in userspace (CRIU) is a powerful tool that can snapshot a running process and later restore it on same or a remote machine but expects the processes that have a device file (e.g. GPU) associated with them, provide necessary driver support to assist CRIU and its extensible plugin interface. Thus, In order to support the Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute Kernel driver, needs to provide a set of new APIs that provide necessary VRAM metadata and its contents to a userspace component (CRIU plugin) that can store it in form of image files. This introduces some new ioctls which will be used to checkpoint-Restore any KFD bound user process. KFD only allows ioctl calls from the same process that opened the KFD file descriptor. Since these ioctls are expected to be called from a KFD criu plugin which has elevated ptrace attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with the file descriptors so modify KFD to allow such calls. (API redesigned by David Yat Sin) Suggested-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: David Yat Sin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index af96af174dc4..49429a6c42fc 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_SET_XNACK_MODE \ AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_END 0x23 #endif -- cgit v1.2.3 From 692996f2bef7aa1737e07554255ba0d9a73fb750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 18 Jan 2022 01:47:58 -0500 Subject: drm/amdkfd: Bump up KFD API version for CRIU - Change KFD minor version to 7 for CRIU Proposed userspace changes: https://github.com/RadeonOpenCompute/criu Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 49429a6c42fc..e6a56c146920 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -32,9 +32,10 @@ * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 6 +#define KFD_IOCTL_MINOR_VERSION 7 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From 7c76ecd9c99b6e9a771d813ab1aa7fa428b3ade1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 8 Feb 2022 16:14:32 +0200 Subject: xfrm: enforce validity of offload input flags struct xfrm_user_offload has flags variable that received user input, but kernel didn't check if valid bits were provided. It caused a situation where not sanitized input was forwarded directly to the drivers. For example, XFRM_OFFLOAD_IPV6 define that was exposed, was used by strongswan, but not implemented in the kernel at all. As a solution, check and sanitize input flags to forward XFRM_OFFLOAD_INBOUND to the drivers. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 4e29d7851890..65e13a099b1a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -511,6 +511,12 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; +/* This flag was exposed without any kernel code that supporting it. + * Unfortunately, strongswan has the code that uses sets this flag, + * which makes impossible to reuse this bit. + * + * So leave it here to make sure that it won't be reused by mistake. + */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 -- cgit v1.2.3 From 63ed1aab3d40aa61aaa66819bdce9377ac7f40fa Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Wed, 9 Feb 2022 12:05:57 +0800 Subject: mctp: Add SIOCMCTP{ALLOC,DROP}TAG ioctls for tag control This change adds a couple of new ioctls for mctp sockets: SIOCMCTPALLOCTAG and SIOCMCTPDROPTAG. These ioctls provide facilities for explicit allocation / release of tags, overriding the automatic allocate-on-send/release-on-reply and timeout behaviours. This allows userspace more control over messages that may not fit a simple request/response model. In order to indicate a pre-allocated tag to the sendmsg() syscall, we introduce a new flag to the struct sockaddr_mctp.smctp_tag value: MCTP_TAG_PREALLOC. Additional changes from Jeremy Kerr . Contains a fix that was: Reported-by: kernel test robot Signed-off-by: Matt Johnston Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/uapi/linux/mctp.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 07b0318716fc..154ab56651f1 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -44,7 +44,25 @@ struct sockaddr_mctp_ext { #define MCTP_TAG_MASK 0x07 #define MCTP_TAG_OWNER 0x08 +#define MCTP_TAG_PREALLOC 0x10 #define MCTP_OPT_ADDR_EXT 1 +#define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0) +#define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1) + +struct mctp_ioc_tag_ctl { + mctp_eid_t peer_addr; + + /* For SIOCMCTPALLOCTAG: must be passed as zero, kernel will + * populate with the allocated tag value. Returned tag value will + * always have TO and PREALLOC set. + * + * For SIOCMCTPDROPTAG: userspace provides tag value to drop, from + * a prior SIOCMCTPALLOCTAG call (and so must have TO and PREALLOC set). + */ + __u8 tag; + __u16 flags; +}; + #endif /* __UAPI_MCTP_H */ -- cgit v1.2.3 From 9a69e2b385f443f244a7e8b8bcafe5ccfb0866b4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 9 Feb 2022 19:43:32 +0100 Subject: bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide remote_port is another case of a BPF context field documented as a 32-bit value in network byte order for which the BPF context access converter generates a load of a zero-padded 16-bit integer in network byte order. First such case was dst_port in bpf_sock which got addressed in commit 4421a582718a ("bpf: Make dst_port field in struct bpf_sock 16-bit wide"). Loading 4-bytes from the remote_port offset and converting the value with bpf_ntohl() leads to surprising results, as the expected value is shifted by 16 bits. Reduce the confusion by splitting the field in two - a 16-bit field holding a big-endian integer, and a 16-bit zero-padding anonymous field that follows it. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220209184333.654927-2-jakub@cloudflare.com --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a7f0ddedac1f..afe3d0d7f5f2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6453,7 +6453,8 @@ struct bpf_sk_lookup { __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ __u32 remote_ip4; /* Network byte order */ __u32 remote_ip6[4]; /* Network byte order */ - __u32 remote_port; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ -- cgit v1.2.3 From 5bdd3eb253544b1e80f904e1205699d0a126d2d6 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 4 Feb 2022 11:58:32 -0500 Subject: drm/amdkfd: Remove unused old debugger implementation Cleanup the kfd code by removing the unused old debugger implementation. The address watch was only ever implemented in the upstream driver for GFXv7 (Kaveri). The user mode tools runtime using this API was never open-sourced. Work on the old debugger prototype that used this API has been discontinued years ago. Only a small piece of resetting wavefronts is kept and is moved to kfd_device_queue_manager.c. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index e6a56c146920..6e4268f5e482 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -756,16 +756,16 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ -- cgit v1.2.3 From 5cad527d5ffa9a1c4731bb9c97d2ee93f8960d50 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 9 Feb 2022 14:08:38 +0800 Subject: net: drop_monitor: support drop reason In the commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") drop reason is introduced to the tracepoint of kfree_skb. Therefore, drop_monitor is able to report the drop reason to users by netlink. The drop reasons are reported as string to users, which is exactly the same as what we do when reporting it to ftrace. Signed-off-by: Menglong Dong Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220209060838.55513-1-imagedong@tencent.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/net_dropmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 66048cc5d7b3..1bbea8f0681e 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -93,6 +93,7 @@ enum net_dm_attr { NET_DM_ATTR_SW_DROPS, /* flag */ NET_DM_ATTR_HW_DROPS, /* flag */ NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */ + NET_DM_ATTR_REASON, /* string */ __NET_DM_ATTR_MAX, NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1 -- cgit v1.2.3 From 7f5a08c79df35e68f1a43033450c5050f12bc155 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Tue, 18 Jan 2022 12:43:15 -0800 Subject: user_events: Add minimal support for trace_event into ftrace Minimal support for interacting with dynamic events, trace_event and ftrace. Core outline of flow between user process, ioctl and trace_event APIs. User mode processes that wish to use trace events to get data into ftrace, perf, eBPF, etc are limited to uprobes today. The user events features enables an ABI for user mode processes to create and write to trace events that are isolated from kernel level trace events. This enables a faster path for tracing from user mode data as well as opens managed code to participate in trace events, where stub locations are dynamic. User processes often want to trace only when it's useful. To enable this a set of pages are mapped into the user process space that indicate the current state of the user events that have been registered. User processes can check if their event is hooked to a trace/probe, and if it is, emit the event data out via the write() syscall. Two new files are introduced into tracefs to accomplish this: user_events_status - This file is mmap'd into participating user mode processes to indicate event status. user_events_data - This file is opened and register/delete ioctl's are issued to create/open/delete trace events that can be used for tracing. The typical scenario is on process start to mmap user_events_status. Processes then register the events they plan to use via the REG ioctl. The ioctl reads and updates the passed in user_reg struct. The status_index of the struct is used to know the byte in the status page to check for that event. The write_index of the struct is used to describe that event when writing out to the fd that was used for the ioctl call. The data must always include this index first when writing out data for an event. Data can be written either by write() or by writev(). For example, in memory: int index; char data[]; Psuedo code example of typical usage: struct user_reg reg; int page_fd = open("user_events_status", O_RDWR); char *page_data = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, page_fd, 0); close(page_fd); int data_fd = open("user_events_data", O_RDWR); reg.size = sizeof(reg); reg.name_args = (__u64)"test"; ioctl(data_fd, DIAG_IOCSREG, ®); int status_id = reg.status_index; int write_id = reg.write_index; struct iovec io[2]; io[0].iov_base = &write_id; io[0].iov_len = sizeof(write_id); io[1].iov_base = payload; io[1].iov_len = sizeof(payload); if (page_data[status_id]) writev(data_fd, io, 2); User events are also exposed via the dynamic_events tracefs file for both create and delete. Current status is exposed via the user_events_status tracefs file. Simple example to register a user event via dynamic_events: echo u:test >> dynamic_events cat dynamic_events u:test If an event is hooked to a probe, the probe hooked shows up: echo 1 > events/user_events/test/enable cat user_events_status 1:test # Used by ftrace Active: 1 Busy: 1 Max: 4096 If an event is not hooked to a probe, no probe status shows up: echo 0 > events/user_events/test/enable cat user_events_status 1:test Active: 1 Busy: 0 Max: 4096 Users can describe the trace event format via the following format: name[:FLAG1[,FLAG2...] [field1[;field2...]] Each field has the following format: type name Example for char array with a size of 20 named msg: echo 'u:detailed char[20] msg' >> dynamic_events cat dynamic_events u:detailed char[20] msg Data offsets are based on the data written out via write() and will be updated to reflect the correct offset in the trace_event fields. For dynamic data it is recommended to use the new __rel_loc data type. This type will be the same as __data_loc, but the offset is relative to this entry. This allows user_events to not worry about what common fields are being inserted before the data. The above format is valid for both the ioctl and the dynamic_events file. Link: https://lkml.kernel.org/r/20220118204326.2169-2-beaub@linux.microsoft.com Acked-by: Masami Hiramatsu Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/user_events.h | 116 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 include/uapi/linux/user_events.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h new file mode 100644 index 000000000000..e570840571e1 --- /dev/null +++ b/include/uapi/linux/user_events.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, Microsoft Corporation. + * + * Authors: + * Beau Belgrave + */ +#ifndef _UAPI_LINUX_USER_EVENTS_H +#define _UAPI_LINUX_USER_EVENTS_H + +#include +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#define USER_EVENTS_SYSTEM "user_events" +#define USER_EVENTS_PREFIX "u:" + +/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ +#define EVENT_BIT_FTRACE 0 +#define EVENT_BIT_PERF 1 +#define EVENT_BIT_OTHER 7 + +#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) +#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) +#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) + +/* Create dynamic location entry within a 32-bit value */ +#define DYN_LOC(offset, size) ((size) << 16 | (offset)) + +/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ +#define FLAG_BPF_ITER (1 << 0) + +/* + * Describes an event registration and stores the results of the registration. + * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum + * must set the size and name_args before invocation. + */ +struct user_reg { + + /* Input: Size of the user_reg structure being used */ + __u32 size; + + /* Input: Pointer to string with event name, description and flags */ + __u64 name_args; + + /* Output: Byte index of the event within the status page */ + __u32 status_index; + + /* Output: Index of the event to use when writing data */ + __u32 write_index; +}; + +#define DIAG_IOC_MAGIC '*' + +/* Requests to register a user_event */ +#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) + +/* Requests to delete a user_event */ +#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) + +/* Data type that was passed to the BPF program */ +enum { + /* Data resides in kernel space */ + USER_BPF_DATA_KERNEL, + + /* Data resides in user space */ + USER_BPF_DATA_USER, + + /* Data is a pointer to a user_bpf_iter structure */ + USER_BPF_DATA_ITER, +}; + +/* + * Describes an iovec iterator that BPF programs can use to access data for + * a given user_event write() / writev() call. + */ +struct user_bpf_iter { + + /* Offset of the data within the first iovec */ + __u32 iov_offset; + + /* Number of iovec structures */ + __u32 nr_segs; + + /* Pointer to iovec structures */ + const struct iovec *iov; +}; + +/* Context that BPF programs receive when attached to a user_event */ +struct user_bpf_context { + + /* Data type being passed (see union below) */ + __u32 data_type; + + /* Length of the data */ + __u32 data_len; + + /* Pointer to data, varies by data type */ + union { + /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ + void *kdata; + + /* User data (data_type == USER_BPF_DATA_USER) */ + void *udata; + + /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ + struct user_bpf_iter *iter; + }; +}; + +#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From a6a6fe27bab48f0d09a64b051e7bde432fcae081 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:37 +0800 Subject: net/smc: Dynamic control handshake limitation by socket options This patch aims to add dynamic control for SMC handshake limitation for every smc sockets, in production environment, it is possible for the same applications to handle different service types, and may have different opinion on SMC handshake limitation. This patch try socket options to complete it, since we don't have socket option level for SMC yet, which requires us to implement it at the same time. This patch does the following: - add new socket option level: SOL_SMC. - add new SMC socket option: SMC_LIMIT_HS. - provide getter/setter for SMC socket options. Link: https://lore.kernel.org/all/20f504f961e1a803f85d64229ad84260434203bd.1644323503.git.alibuda@linux.alibaba.com/ Signed-off-by: D. Wythe Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 6c2874fd2c00..343e7450c3a3 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -284,4 +284,8 @@ enum { __SMC_NLA_SEID_TABLE_MAX, SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 }; + +/* SMC socket options */ +#define SMC_LIMIT_HS 1 /* constraint on smc handshake */ + #endif /* _UAPI_LINUX_SMC_H */ -- cgit v1.2.3 From f9496b7c1b48ce02cd17a3ee88b1e049c689a222 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 10 Feb 2022 17:11:38 +0800 Subject: net/smc: Add global configure for handshake limitation by netlink Although we can control SMC handshake limitation through socket options, which means that applications who need it must modify their code. It's quite troublesome for many existing applications. This patch modifies the global default value of SMC handshake limitation through netlink, providing a way to put constraint on handshake without modifies any code for applications. Suggested-by: Tony Lu Signed-off-by: D. Wythe Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 343e7450c3a3..693f549f6966 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -59,6 +59,9 @@ enum { SMC_NETLINK_DUMP_SEID, SMC_NETLINK_ENABLE_SEID, SMC_NETLINK_DISABLE_SEID, + SMC_NETLINK_DUMP_HS_LIMITATION, + SMC_NETLINK_ENABLE_HS_LIMITATION, + SMC_NETLINK_DISABLE_HS_LIMITATION, }; /* SMC_GENL_FAMILY top level attributes */ @@ -285,6 +288,14 @@ enum { SMC_NLA_SEID_TABLE_MAX = __SMC_NLA_SEID_TABLE_MAX - 1 }; +/* SMC_NETLINK_HS_LIMITATION attributes */ +enum { + SMC_NLA_HS_LIMITATION_UNSPEC, + SMC_NLA_HS_LIMITATION_ENABLED, /* u8 */ + __SMC_NLA_HS_LIMITATION_MAX, + SMC_NLA_HS_LIMITATION_MAX = __SMC_NLA_HS_LIMITATION_MAX - 1 +}; + /* SMC socket options */ #define SMC_LIMIT_HS 1 /* constraint on smc handshake */ -- cgit v1.2.3 From e9e9feebcbc14b174fef862842f8cc9a388e1db3 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:10 +0100 Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL User space needs a mechanism to perform key checked accesses when emulating instructions. The key can be passed as an additional argument. Having an additional argument is flexible, as user space can pass the guest PSW's key, in order to make an access the same way the CPU would, or pass another key if necessary. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..4566f429db2c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -562,7 +562,10 @@ struct kvm_s390_mem_op { __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ union { - __u8 ar; /* the access register number */ + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* should be set to 0 */ }; @@ -575,6 +578,7 @@ struct kvm_s390_mem_op { /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) /* for KVM_INTERRUPT */ struct kvm_interrupt { -- cgit v1.2.3 From ef11c9463ae006302ce170a401854a48ea0532ca Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:11 +0100 Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access Channel I/O honors storage keys and is performed on absolute memory. For I/O emulation user space therefore needs to be able to do key checked accesses. The vm IOCTL supports read/write accesses, as well as checking if an access would succeed. Unlike relying on KVM_S390_GET_SKEYS for key checking would, the vm IOCTL performs the check in lockstep with the read or write, by, ultimately, mapping the access to move instructions that support key protection checking with a supplied key. Fetch and storage protection override are not applicable to absolute accesses and so are not applied as they are when using the vcpu memop. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4566f429db2c..4bc7623def87 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -575,6 +575,8 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_LOGICAL_WRITE 1 #define KVM_S390_MEMOP_SIDA_READ 2 #define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -- cgit v1.2.3 From d004079edc166ff19605475211305923c708b4d5 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:13 +0100 Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that: * The vcpu MEM_OP IOCTL supports storage key checking. * The vm MEM_OP IOCTL exists. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4bc7623def87..08756eeea065 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1140,6 +1140,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_S390_MEM_OP_EXTENSION 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 5e35d0eb472b48ac9c8ef7017753b8a1f765aa01 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 11 Feb 2022 19:22:14 +0100 Subject: KVM: s390: Update api documentation for memop ioctl Document all currently existing operations, flags and explain under which circumstances they are available. Document the recently introduced absolute operations and the storage key protection flag, as well as the existing SIDA operations. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20220211182215.2730017-10-scgl@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 08756eeea065..dbc550bbd9fa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -567,7 +567,7 @@ struct kvm_s390_mem_op { __u8 key; /* access key, ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* should be set to 0 */ + __u8 reserved[32]; /* ignored */ }; }; /* types for kvm_s390_mem_op->op */ -- cgit v1.2.3 From 761b9b366cec0c81a1cd80930f00611d86521d1b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:53 +0000 Subject: elf: Introduce the ARM MTE ELF segment type Memory tags will be dumped in the core file as segments with their own type. Discussions with the binutils and the generic ABI community settled on using new definitions in the PT_*PROC space (and to be documented in the processor-specific ABIs). Introduce PT_ARM_MEMTAG_MTE as (PT_LOPROC + 0x1). Not included in this patch since there is no upstream support but the CHERI/BSD community will also reserve: #define PT_ARM_MEMTAG_CHERI (PT_LOPROC + 0x2) #define PT_RISCV_MEMTAG_CHERI (PT_LOPROC + 0x3) Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-3-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..fe8e5b74cb39 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -40,6 +40,9 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* ARM MTE memory tag segment type */ +#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) + /* * Extended Numbering * -- cgit v1.2.3 From 5cd5a8a3e2fb11b1c8a09f062c44c1e228ef987a Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:53 +0100 Subject: cfg80211: Add data structures to capture EHT capabilities And advertise EHT capabilities to user space when supported. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.6fb70658529f.I2413a37c8f7d2d6d638038a3d95360a3fce0114d@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 195a238a322e..d305a8b8c536 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3766,6 +3766,14 @@ enum nl80211_mpath_info { * given for all 6 GHz band channels * @NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS: vendor element capabilities that are * advertised on this band/for this iftype (binary) + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC: EHT MAC capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY: EHT PHY capabilities as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET: EHT supported NSS/MCS as in EHT + * capabilities element + * @NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE: EHT PPE thresholds information as + * defined in EHT capabilities element * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band attribute currently defined */ @@ -3779,6 +3787,10 @@ enum nl80211_band_iftype_attr { NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, + NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, /* keep last */ __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST, -- cgit v1.2.3 From 3743bec6120ae0748cb3fda6ff80a690117ef1f3 Mon Sep 17 00:00:00 2001 From: Jia Ding Date: Mon, 14 Feb 2022 17:29:54 +0100 Subject: cfg80211: Add support for EHT 320 MHz channel width Add 320 MHz support in the channel def and center frequency validation with compatible check. Signed-off-by: Jia Ding Co-authored-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Co-authored-by: Muna Sinada Signed-off-by: Muna Sinada Co-authored-by: Veerendranath Jakkam Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-5-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-1-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d305a8b8c536..9e05973f3f56 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4684,6 +4684,8 @@ enum nl80211_key_mode { * @NL80211_CHAN_WIDTH_4: 4 MHz OFDM channel * @NL80211_CHAN_WIDTH_8: 8 MHz OFDM channel * @NL80211_CHAN_WIDTH_16: 16 MHz OFDM channel + * @NL80211_CHAN_WIDTH_320: 320 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well */ enum nl80211_chan_width { NL80211_CHAN_WIDTH_20_NOHT, @@ -4699,6 +4701,7 @@ enum nl80211_chan_width { NL80211_CHAN_WIDTH_4, NL80211_CHAN_WIDTH_8, NL80211_CHAN_WIDTH_16, + NL80211_CHAN_WIDTH_320, }; /** -- cgit v1.2.3 From cfb14110acf87b4db62e07ba08a80429f1749f40 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 14 Feb 2022 17:29:55 +0100 Subject: nl80211: add EHT MCS support Add support for reporting and calculating EHT bitrates. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-7-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 62 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9e05973f3f56..d0ba70ea5d04 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3392,6 +3392,56 @@ enum nl80211_he_ru_alloc { NL80211_RATE_INFO_HE_RU_ALLOC_2x996, }; +/** + * enum nl80211_eht_gi - EHT guard interval + * @NL80211_RATE_INFO_EHT_GI_0_8: 0.8 usec + * @NL80211_RATE_INFO_EHT_GI_1_6: 1.6 usec + * @NL80211_RATE_INFO_EHT_GI_3_2: 3.2 usec + */ +enum nl80211_eht_gi { + NL80211_RATE_INFO_EHT_GI_0_8, + NL80211_RATE_INFO_EHT_GI_1_6, + NL80211_RATE_INFO_EHT_GI_3_2, +}; + +/** + * enum nl80211_eht_ru_alloc - EHT RU allocation values + * @NL80211_RATE_INFO_EHT_RU_ALLOC_26: 26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52: 52-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_52P26: 52+26-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106: 106-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_106P26: 106+26 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_242: 242-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484: 484-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_484P242: 484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996: 996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484: 996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242: 996+484+242 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996: 2x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484: 2x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996: 3x996-tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484: 3x996+484 tone RU allocation + * @NL80211_RATE_INFO_EHT_RU_ALLOC_4x996: 4x996-tone RU allocation + */ +enum nl80211_eht_ru_alloc { + NL80211_RATE_INFO_EHT_RU_ALLOC_26, + NL80211_RATE_INFO_EHT_RU_ALLOC_52, + NL80211_RATE_INFO_EHT_RU_ALLOC_52P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_106, + NL80211_RATE_INFO_EHT_RU_ALLOC_106P26, + NL80211_RATE_INFO_EHT_RU_ALLOC_242, + NL80211_RATE_INFO_EHT_RU_ALLOC_484, + NL80211_RATE_INFO_EHT_RU_ALLOC_484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_996, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996, + NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484, + NL80211_RATE_INFO_EHT_RU_ALLOC_4x996, +}; + /** * enum nl80211_rate_info - bitrate information * @@ -3431,6 +3481,13 @@ enum nl80211_he_ru_alloc { * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1) * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then * non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc) + * @NL80211_RATE_INFO_320_MHZ_WIDTH: 320 MHz bitrate + * @NL80211_RATE_INFO_EHT_MCS: EHT MCS index (u8, 0-15) + * @NL80211_RATE_INFO_EHT_NSS: EHT NSS value (u8, 1-8) + * @NL80211_RATE_INFO_EHT_GI: EHT guard interval identifier + * (u8, see &enum nl80211_eht_gi) + * @NL80211_RATE_INFO_EHT_RU_ALLOC: EHT RU allocation, if not present then + * non-OFDMA was used (u8, see &enum nl80211_eht_ru_alloc) * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -3452,6 +3509,11 @@ enum nl80211_rate_info { NL80211_RATE_INFO_HE_GI, NL80211_RATE_INFO_HE_DCM, NL80211_RATE_INFO_HE_RU_ALLOC, + NL80211_RATE_INFO_320_MHZ_WIDTH, + NL80211_RATE_INFO_EHT_MCS, + NL80211_RATE_INFO_EHT_NSS, + NL80211_RATE_INFO_EHT_GI, + NL80211_RATE_INFO_EHT_RU_ALLOC, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, -- cgit v1.2.3 From c2b3d7699fb0ce66538b829af43970acc2f89060 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Mon, 14 Feb 2022 17:29:56 +0100 Subject: nl80211: add support for 320MHz channel limitation Add support to advertise drivers or regulatory limitations on 320 MHz channels to userspace. Signed-off-by: Sriram R Co-authored-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Co-authored-by: Veerendranath Jakkam Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1640163883-12696-6-git-send-email-quic_vjakkam@quicinc.com Link: https://lore.kernel.org/r/20220214163009.175289-3-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d0ba70ea5d04..6a338dafcd07 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3997,6 +3997,8 @@ enum nl80211_wmm_rule { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_16MHZ: 16 MHz operation is allowed * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel + * as the primary or any of the secondary channels isn't possible * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4033,6 +4035,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_4MHZ, NL80211_FREQUENCY_ATTR_8MHZ, NL80211_FREQUENCY_ATTR_16MHZ, + NL80211_FREQUENCY_ATTR_NO_320MHZ, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4231,6 +4234,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed * @NL80211_RRF_NO_HE: HE operation not allowed + * @NL80211_RRF_NO_320MHZ: 320MHz operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4249,6 +4253,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_80MHZ = 1<<15, NL80211_RRF_NO_160MHZ = 1<<16, NL80211_RRF_NO_HE = 1<<17, + NL80211_RRF_NO_320MHZ = 1<<18, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From 31846b657857e6a73d982604f36a34710d98902c Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:57 +0100 Subject: cfg80211: add NO-EHT flag to regulatory This may be necessary in some cases, add a flag and propagate it, just like the NO-HE that already exists. Signed-off-by: Ilan Peer [split off from a combined 320/no-EHT patch] Link: https://lore.kernel.org/r/20220214173004.dbb85a7b86bb.Ifc1e2daac51c1cc5f895ccfb79faf5eaec3950ec@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6a338dafcd07..baf6433c0119 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3999,6 +3999,8 @@ enum nl80211_wmm_rule { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_NO_320MHZ: any 320 MHz channel using this channel * as the primary or any of the secondary channels isn't possible + * @NL80211_FREQUENCY_ATTR_NO_EHT: EHT operation is not allowed on this channel + * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4036,6 +4038,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_8MHZ, NL80211_FREQUENCY_ATTR_16MHZ, NL80211_FREQUENCY_ATTR_NO_320MHZ, + NL80211_FREQUENCY_ATTR_NO_EHT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit v1.2.3 From ea05fd3581d32a0f1098657005c7a9b763798fe8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 14 Feb 2022 17:29:58 +0100 Subject: cfg80211: Support configuration of station EHT capabilities Add attributes and some code bits to support userspace passing in EHT capabilities of stations. Signed-off-by: Ilan Peer Link: https://lore.kernel.org/r/20220214173004.ecf0b3ff9627.Icb4a5f2ec7b41d9008ac4cfc16c59baeb84793d3@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index baf6433c0119..98ed52663d6b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -2659,6 +2659,10 @@ enum nl80211_commands { * enumerated in &enum nl80211_ap_settings_flags. This attribute shall be * used with %NL80211_CMD_START_AP request. * + * @NL80211_ATTR_EHT_CAPABILITY: EHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION). Can be set + * only if %NL80211_STA_FLAG_WME is set. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3169,6 +3173,8 @@ enum nl80211_attrs { NL80211_ATTR_AP_SETTINGS_FLAGS, + NL80211_ATTR_EHT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3224,6 +3230,8 @@ enum nl80211_attrs { #define NL80211_HE_MAX_CAPABILITY_LEN 54 #define NL80211_MAX_NR_CIPHER_SUITES 5 #define NL80211_MAX_NR_AKM_SUITES 2 +#define NL80211_EHT_MIN_CAPABILITY_LEN 13 +#define NL80211_EHT_MAX_CAPABILITY_LEN 51 #define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 -- cgit v1.2.3 From 79aa0367385ceaf5351ea77ea1fb66136739ea9d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Feb 2022 18:54:38 -0500 Subject: drm/amdkfd: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference: https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays CC: Changcheng Deng Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e4268f5e482..baec5a41de3e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -673,7 +673,7 @@ struct kfd_ioctl_svm_args { __u32 op; __u32 nattr; /* Variable length array of attributes */ - struct kfd_ioctl_svm_attribute attrs[0]; + struct kfd_ioctl_svm_attribute attrs[]; }; /** -- cgit v1.2.3 From 169adc2b6b3c5e86391921117b4ab3aaeb3c6ee1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:26 +0900 Subject: android/binder.h: add linux/android/binder(fs).h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/android/binder.h and linux/android/binderfs.h are currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/linux/android/binder.h In file included from : ./usr/include/linux/android/binder.h:291:9: error: unknown type name ‘pid_t’ 291 | pid_t sender_pid; | ^~~~~ ./usr/include/linux/android/binder.h:292:9: error: unknown type name ‘uid_t’ 292 | uid_t sender_euid; | ^~~~~ The errors can be fixed by replacing {pid,uid}_t with __kernel_{pid,uid}_t. Then, remove the no-header-test entries from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/android/binder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 3246f2c74696..11157fae8a8e 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -288,8 +288,8 @@ struct binder_transaction_data { /* General information about the transaction. */ __u32 flags; - pid_t sender_pid; - uid_t sender_euid; + __kernel_pid_t sender_pid; + __kernel_uid_t sender_euid; binder_size_t data_size; /* number of bytes of data */ binder_size_t offsets_size; /* number of bytes of offsets */ -- cgit v1.2.3 From cbf2820341297b9aed0f846aba35556e94569210 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:27 +0900 Subject: fsmap.h: add linux/fsmap.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/fsmap.h is currently excluded from the UAPI compile-test because of the error like follows: HDRTEST usr/include/linux/fsmap.h In file included from : ./usr/include/linux/fsmap.h:72:19: error: unknown type name ‘size_t’ 72 | static __inline__ size_t | ^~~~~~ The error can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/fsmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h index 91fd519a3f7d..c690d17f1d07 100644 --- a/include/uapi/linux/fsmap.h +++ b/include/uapi/linux/fsmap.h @@ -69,7 +69,7 @@ struct fsmap_head { }; /* Size of an fsmap_head with room for nr records. */ -static inline size_t +static inline __kernel_size_t fsmap_sizeof( unsigned int nr) { -- cgit v1.2.3 From 8b4bca21c2c0cb3b5adb80985830a81d4e5d7081 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:28 +0900 Subject: kexec.h: add linux/kexec.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/kexec.h is currently excluded from the UAPI compile-test because of the errors like follows: HDRTEST usr/include/linux/kexec.h In file included from : ./usr/include/linux/kexec.h:56:9: error: unknown type name ‘size_t’ 56 | size_t bufsz; | ^~~~~~ ./usr/include/linux/kexec.h:58:9: error: unknown type name ‘size_t’ 58 | size_t memsz; | ^~~~~~ The errors can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/kexec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 778dc191c265..fb7e2ef60825 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -54,9 +54,9 @@ */ struct kexec_segment { const void *buf; - size_t bufsz; + __kernel_size_t bufsz; const void *mem; - size_t memsz; + __kernel_size_t memsz; }; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 2a5c0fdc70cd653741e910e92ffeb2fa7376db07 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Feb 2022 11:11:29 +0900 Subject: reiserfs_xattr.h: add linux/reiserfs_xattr.h to UAPI compile-test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/reiserfs_xattr.h is currently excluded from the UAPI compile-test because of the error like follows: HDRTEST usr/include/linux/reiserfs_xattr.h In file included from : ./usr/include/linux/reiserfs_xattr.h:22:9: error: unknown type name ‘size_t’ 22 | size_t length; | ^~~~~~ The error can be fixed by replacing size_t with __kernel_size_t. Then, remove the no-header-test entry from user/include/Makefile. Signed-off-by: Masahiro Yamada Reviewed-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- include/uapi/linux/reiserfs_xattr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/reiserfs_xattr.h b/include/uapi/linux/reiserfs_xattr.h index 28f10842f047..503ad018ce5b 100644 --- a/include/uapi/linux/reiserfs_xattr.h +++ b/include/uapi/linux/reiserfs_xattr.h @@ -19,7 +19,7 @@ struct reiserfs_xattr_header { struct reiserfs_security_handle { const char *name; void *value; - size_t length; + __kernel_size_t length; }; #endif /* _LINUX_REISERFS_XATTR_H */ -- cgit v1.2.3 From d4568fc8525897e683983806f813be1ae9eedaed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jan 2022 18:29:52 +0100 Subject: media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Cc: Arnd Bergmann Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Signed-off-by: Kees Cook Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- cgit v1.2.3 From 6de74d1069b821e96460d0fc2edfc35785db04fb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 9 Feb 2022 08:11:10 -0800 Subject: hv_utils: Add comment about max VMbus packet size in VSS driver The VSS driver allocates a VMbus receive buffer significantly larger than sizeof(hv_vss_msg), with no explanation. To help prevent future mistakes, add a #define and comment about why this is done. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1644423070-75125-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- include/uapi/linux/hyperv.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h index daf82a230c0e..aaa502a7bff4 100644 --- a/include/uapi/linux/hyperv.h +++ b/include/uapi/linux/hyperv.h @@ -90,6 +90,17 @@ struct hv_vss_check_dm_info { __u32 flags; } __attribute__((packed)); +/* + * struct hv_vss_msg encodes the fields that the Linux VSS + * driver accesses. However, FREEZE messages from Hyper-V contain + * additional LUN information that Linux doesn't use and are not + * represented in struct hv_vss_msg. A received FREEZE message may + * be as large as 6,260 bytes, so the driver must allocate at least + * that much space, not sizeof(struct hv_vss_msg). Other messages + * such as AUTO_RECOVER may be as large as 12,500 bytes. However, + * because the Linux VSS driver responds that it doesn't support + * auto-recovery, it should not receive such messages. + */ struct hv_vss_msg { union { struct hv_vss_hdr vss_hdr; -- cgit v1.2.3 From 47f0bd5032106469827cf56c8b45bb9101112105 Mon Sep 17 00:00:00 2001 From: Jacques de Laval Date: Thu, 17 Feb 2022 16:02:02 +0100 Subject: net: Add new protocol attribute to IP addresses This patch adds a new protocol attribute to IPv4 and IPv6 addresses. Inspiration was taken from the protocol attribute of routes. User space applications like iproute2 can set/get the protocol with the Netlink API. The attribute is stored as an 8-bit unsigned integer. The protocol attribute is set by kernel for these categories: - IPv4 and IPv6 loopback addresses - IPv6 addresses generated from router announcements - IPv6 link local addresses User space may pass custom protocols, not defined by the kernel. Grouping addresses on their origin is useful in scenarios where you want to distinguish between addresses based on who added them, e.g. kernel vs. user space. Tagging addresses with a string label is an existing feature that could be used as a solution. Unfortunately the max length of a label is 15 characters, and for compatibility reasons the label must be prefixed with the name of the device followed by a colon. Since device names also have a max length of 15 characters, only -1 characters is guaranteed to be available for any origin tag, which is not that much. A reference implementation of user space setting and getting protocols is available for iproute2: https://github.com/westermo/iproute2/commit/9a6ea18bd79f47f293e5edc7780f315ea42ff540 Signed-off-by: Jacques de Laval Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220217150202.80802-1-Jacques.De.Laval@westermo.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_addr.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index dfcf3ce0097f..1c392dd95a5e 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -33,8 +33,9 @@ enum { IFA_CACHEINFO, IFA_MULTICAST, IFA_FLAGS, - IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ __IFA_MAX, }; @@ -69,4 +70,10 @@ struct ifa_cacheinfo { #define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) #endif +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + #endif -- cgit v1.2.3 From 4b340a5a726dafba15b366c4009aa0a8f77631ac Mon Sep 17 00:00:00 2001 From: Mobashshera Rasool Date: Thu, 17 Feb 2022 07:46:40 +0000 Subject: net: ip6mr: add support for passing full packet on wrong mif This patch adds support for MRT6MSG_WRMIFWHOLE which is used to pass full packet and real vif id when the incoming interface is wrong. While the RP and FHR are setting up state we need to be sending the registers encapsulated with all the data inside otherwise we lose it. The RP then decapsulates it and forwards it to the interested parties. Currently with WRONGMIF we can only be sending empty register packets and will lose that data. This behaviour can be enabled by using MRT_PIM with val == MRT6MSG_WRMIFWHOLE. This doesn't prevent MRT6MSG_WRONGMIF from happening, it happens in addition to it, also it is controlled by the same throttling parameters as WRONGMIF (i.e. 1 packet per 3 seconds currently). Both messages are generated to keep backwards compatibily and avoid breaking someone who was enabling MRT_PIM with val == 4, since any positive val is accepted and treated the same. Signed-off-by: Mobashshera Rasool Signed-off-by: David S. Miller --- include/uapi/linux/mroute6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index a1fd6173e2db..1d90c21a6251 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -134,6 +134,7 @@ struct mrt6msg { #define MRT6MSG_NOCACHE 1 #define MRT6MSG_WRONGMIF 2 #define MRT6MSG_WHOLEPKT 3 /* used for use level encap */ +#define MRT6MSG_WRMIFWHOLE 4 /* For PIM Register and assert processing */ __u8 im6_mbz; /* must be zero */ __u8 im6_msgtype; /* what type of message */ __u16 im6_mif; /* mif rec'd on */ -- cgit v1.2.3 From a1dc6308865df719efb2a2f8a5f0f5979602d267 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Fri, 21 Jan 2022 16:08:16 +1030 Subject: fsi: sbefifo: Implement FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl FSI_SBEFIFO_READ_TIMEOUT_SECONDS ioctl sets the read timeout (in seconds) for the response received by sbefifo device from sbe. The timeout affects only the read operation on current sbefifo device fd. Certain SBE operations can take long time to complete and the default timeout of 10 seconds might not be sufficient to start receiving response from SBE. In such cases, allow the timeout to be set to the maximum of 120 seconds. The kernel does not contain the definition of the various SBE operations, so we must expose an interface to userspace to set the timeout for the given operation. Signed-off-by: Amitay Isaacs Signed-off-by: Joel Stanley Reviewed-by: Eddie James Link: https://lore.kernel.org/r/20220121053816.82253-3-joel@jms.id.au Signed-off-by: Joel Stanley --- include/uapi/linux/fsi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fsi.h b/include/uapi/linux/fsi.h index da577ecd90e7..b2f1977378c7 100644 --- a/include/uapi/linux/fsi.h +++ b/include/uapi/linux/fsi.h @@ -55,4 +55,18 @@ struct scom_access { #define FSI_SCOM_WRITE _IOWR('s', 0x02, struct scom_access) #define FSI_SCOM_RESET _IOW('s', 0x03, __u32) +/* + * /dev/sbefifo* ioctl interface + */ + +/** + * FSI_SBEFIFO_READ_TIMEOUT sets the read timeout for response from SBE. + * + * The read timeout is specified in seconds. The minimum value of read + * timeout is 10 seconds (default) and the maximum value of read timeout is + * 120 seconds. A read timeout of 0 will reset the value to the default of + * (10 seconds). + */ +#define FSI_SBEFIFO_READ_TIMEOUT_SECONDS _IOW('s', 0x00, __u32) + #endif /* _UAPI_LINUX_FSI_H */ -- cgit v1.2.3 From 129e3c1bab24d27d0fa6e505a472345a92d7a2b0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 21 Feb 2022 13:54:57 +0800 Subject: bonding: add new option ns_ip6_target This patch add a new bonding option ns_ip6_target, which correspond to the arp_ip_target. With this we set IPv6 targets and send IPv6 NS request to determine the health of the link. For other related options like the validation, we still use arp_validate, and will change to ns_validate later. Note: the sysfs configuration support was removed based on https://lore.kernel.org/netdev/8863.1645071997@famine Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6218f93f5c1a..e1ba2d51b717 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -860,6 +860,7 @@ enum { IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, + IFLA_BOND_NS_IP6_TARGET, __IFLA_BOND_MAX, }; -- cgit v1.2.3 From c086df4902573e2f06c6a2a83452c13a8bc603f5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 10 Jan 2022 18:52:52 -0500 Subject: fuse: move FUSE_SUPER_MAGIC definition to magic.h ...to help userland apps that need to identify FUSE mounts. Signed-off-by: Jeff Layton Signed-off-by: Miklos Szeredi --- include/uapi/linux/magic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 0425cd79af9a..f724129c0425 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -36,6 +36,7 @@ #define EFIVARFS_MAGIC 0xde5e81e4 #define HOSTFS_SUPER_MAGIC 0x00c0ffee #define OVERLAYFS_SUPER_MAGIC 0x794c7630 +#define FUSE_SUPER_MAGIC 0x65735546 #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ #define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ -- cgit v1.2.3 From d43583b890e7cb0078d13d056753a56602b92406 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Feb 2022 15:35:23 +0000 Subject: KVM: arm64: Expose PSCI SYSTEM_RESET2 call to the guest PSCI v1.1 introduces the optional SYSTEM_RESET2 call, which allows the caller to provide a vendor-specific "reset type" and "cookie" to request a particular form of reset or shutdown. Expose this call to the guest and handle it in the same way as PSCI SYSTEM_RESET, along with some basic range checking on the type argument. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220221153524.15397-3-will@kernel.org --- include/uapi/linux/psci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 2fcad1dd0b0e..2bf93c0d6354 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -82,6 +82,10 @@ #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2 +/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */ +#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 +#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ -- cgit v1.2.3 From a603ca60cebff8589882427a67f870ed946b3fc8 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Thu, 10 Feb 2022 16:42:03 -0800 Subject: serial: 8250_aspeed_vuart: add PORT_ASPEED_VUART port type Commit 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping") fixed a bug that had, as a side-effect, prevented the 8250_aspeed_vuart driver from enabling the VUART's FIFOs. However, fixing that (and hence enabling the FIFOs) has in turn revealed what appears to be a hardware bug in the ASPEED VUART in which the host-side THRE bit doesn't get if the BMC-side receive FIFO trigger level is set to anything but one byte. This causes problems for polled-mode writes from the host -- for example, Linux kernel console writes proceed at a glacial pace (less than 100 bytes per second) because the write path waits for a 10ms timeout to expire after every character instead of being able to continue on to the next character upon seeing THRE asserted. (GRUB behaves similarly.) As a workaround, introduce a new port type for the ASPEED VUART that's identical to PORT_16550A as it had previously been using, but with UART_FCR_R_TRIG_00 instead to set the receive FIFO trigger level to one byte, which (experimentally) seems to avoid the problematic THRE behavior. Fixes: 54da3e381c2b ("serial: 8250_aspeed_vuart: use UPF_IOREMAP to set up register mapping") Tested-by: Konstantin Aladyshev Reviewed-by: Andy Shevchenko Signed-off-by: Zev Weiss Link: https://lore.kernel.org/r/20220211004203.14915-1-zev@bewilderbeest.net Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index c4042dcfdc0c..8885e69178bd 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -68,6 +68,9 @@ /* NVIDIA Tegra Combined UART */ #define PORT_TEGRA_TCU 41 +/* ASPEED AST2x00 virtual UART */ +#define PORT_ASPEED_VUART 42 + /* Intel EG20 */ #define PORT_PCH_8LINE 44 #define PORT_PCH_2LINE 45 -- cgit v1.2.3 From a1a5cfe70cd29a59a9a85290dfe95ed1c8df1193 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Mon, 14 Feb 2022 09:38:06 +0200 Subject: iio: introduce mag_referenced Some accelerometers that support activity and inactivity events also support a referenced mode, in which the gravitational acceleration is taken as a point of reference before comparing the acceleration to the specified activity and inactivity magnitude. For example, in the case of the ADXL367, for activity detection, the formula is: abs(acceleration - reference) > magnitude Add a new event type that makes this behavior clear. Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220214073810.781016-2-cosmin.tanislav@analog.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 48c13147c0a8..472cead10d8d 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -104,6 +104,7 @@ enum iio_event_type { IIO_EV_TYPE_THRESH_ADAPTIVE, IIO_EV_TYPE_MAG_ADAPTIVE, IIO_EV_TYPE_CHANGE, + IIO_EV_TYPE_MAG_REFERENCED, }; enum iio_event_direction { -- cgit v1.2.3 From 0fbb4d93b38bce1f8235aacfa37e90ad8f011473 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 17 Feb 2022 23:40:32 -0500 Subject: dm: add dm_submit_bio_remap interface Where possible, switch from early bio-based IO accounting (at the time DM clones each incoming bio) to late IO accounting just before each remapped bio is issued to underlying device via submit_bio_noacct(). Allows more precise bio-based IO accounting for DM targets that use their own workqueues to perform additional processing of each bio in conjunction with their DM_MAPIO_SUBMITTED return from their map function. When a target is updated to use dm_submit_bio_remap() they must also set ti->accounts_remapped_io to true. Use xchg() in start_io_acct(), as suggested by Mikulas, to ensure each IO is only started once. The xchg race only happens if __send_duplicate_bios() sends multiple bios -- that case is reflected via tio->is_duplicate_bio. Given the niche nature of this race, it is best to avoid any xchg performance penalty for normal IO. For IO that was never submitted with dm_bio_submit_remap(), but the target completes the clone with bio_endio, accounting is started then ended and pending_io counter decremented. Reviewed-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index c12ce30b52df..2e9550fef90f 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 45 +#define DM_VERSION_MINOR 46 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2021-03-22)" +#define DM_VERSION_EXTRA "-ioctl (2022-02-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 93b71801a8274cd9511557faf04365a5de487197 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Feb 2022 09:06:54 -0500 Subject: KVM: PPC: reserve capability 210 for KVM_CAP_PPC_AIL_MODE_3 Add KVM_CAP_PPC_AIL_MODE_3 to advertise the capability to set the AIL resource mode to 3 with the H_SET_MODE hypercall. This capability differs between processor types and KVM types (PR, HV, Nested HV), and affects guest-visible behaviour. QEMU will implement a cap-ail-mode-3 to control this behaviour[1], and use the KVM CAP if available to determine KVM support[2]. Reviewed-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5191b57e1562..507ee1f2aa96 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1134,6 +1134,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 #define KVM_CAP_SYS_ATTRIBUTES 209 +#define KVM_CAP_PPC_AIL_MODE_3 210 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From a21d9a670d81103db7f788de1a4a4a6e4b891a0b Mon Sep 17 00:00:00 2001 From: Hans Schultz Date: Wed, 23 Feb 2022 11:16:46 +0100 Subject: net: bridge: Add support for bridge port in locked mode In a 802.1X scenario, clients connected to a bridge port shall not be allowed to have traffic forwarded until fully authenticated. A static fdb entry of the clients MAC address for the bridge port unlocks the client and allows bidirectional communication. This scenario is facilitated with setting the bridge port in locked mode, which is also supported by various switchcore chipsets. Signed-off-by: Hans Schultz Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e1ba2d51b717..be09d2ad4b5d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -537,6 +537,7 @@ enum { IFLA_BRPORT_MRP_IN_OPEN, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + IFLA_BRPORT_LOCKED, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) -- cgit v1.2.3 From 1241e329ce2e1f5b1039fd356b75867b29721ad2 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 23 Feb 2022 00:09:12 +0530 Subject: ethtool: add support to set/get completion queue event size Add support to set completion queue event size via ethtool -G parameter and get it via ethtool -g parameter. ~ # ./ethtool -G eth0 cqe-size 512 ~ # ./ethtool -g eth0 Ring parameters for eth0: Pre-set maximums: RX: 1048576 RX Mini: n/a RX Jumbo: n/a TX: 1048576 Current hardware settings: RX: 256 RX Mini: n/a RX Jumbo: n/a TX: 4096 RX Buf Len: 2048 CQE Size: 128 Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 417d4280d7b5..979850221b8d 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -337,6 +337,7 @@ enum { ETHTOOL_A_RINGS_TX, /* u32 */ ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From 28a3f0601727d521a1c6cce62ecbcb7402a9e4f5 Mon Sep 17 00:00:00 2001 From: Toms Atteka Date: Wed, 23 Feb 2022 16:54:09 -0800 Subject: net: openvswitch: IPv6: Add IPv6 extension header support This change adds a new OpenFlow field OFPXMT_OFB_IPV6_EXTHDR and packets can be filtered using ipv6_ext flag. Signed-off-by: Toms Atteka Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 150bcff49b1c..9d1710f20505 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,6 +351,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ @@ -430,6 +431,11 @@ struct ovs_key_ipv6 { __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ }; +/* separate structure to support backward compatibility with older user space */ +struct ovs_key_ipv6_exthdrs { + __u16 hdrs; +}; + struct ovs_key_tcp { __be16 tcp_src; __be16 tcp_dst; -- cgit v1.2.3 From ba7bb663f5547ef474c98df99a97bb4a13c5715f Mon Sep 17 00:00:00 2001 From: David Dunn Date: Wed, 23 Feb 2022 22:57:41 +0000 Subject: KVM: x86: Provide per VM capability for disabling PMU virtualization Add a new capability, KVM_CAP_PMU_CAPABILITY, that takes a bitmask of settings/features to allow userspace to configure PMU virtualization on a per-VM basis. For now, support a single flag, KVM_PMU_CAP_DISABLE, to allow disabling PMU virtualization for a VM even when KVM is configured with enable_pmu=true a module level. To keep KVM simple, disallow changing VM's PMU configuration after vCPUs have been created. Signed-off-by: David Dunn Message-Id: <20220223225743.2703915-2-daviddunn@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a02bbf8fd0f6..d2f1efc3aa35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1142,6 +1142,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_ATTRIBUTES 209 #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 +#define KVM_CAP_PMU_CAPABILITY 212 #ifdef KVM_CAP_IRQ_ROUTING @@ -1978,6 +1979,8 @@ struct kvm_dirty_gfn { #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) +#define KVM_PMU_CAP_DISABLE (1 << 0) + /** * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. -- cgit v1.2.3 From 43245eca6e670ebf65908b549641c1460a9cc944 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 2 Feb 2022 17:55:02 -0500 Subject: NFSv4.1 support for NFS4_RESULT_PRESERVER_UNLINKED In 4.1+, the server is allowed to set a flag NFS4_RESULT_PRESERVE_UNLINKED in reply to the OPEN, that tells the client that it does not need to do a silly rename of an opened file when it's being removed. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 800bb0ffa6e6..1d2043708bf1 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -45,6 +45,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F -- cgit v1.2.3 From 9e8d5470325f25bed7d33f9faaae6d5e4f313650 Mon Sep 17 00:00:00 2001 From: Hammer Hsieh Date: Tue, 22 Feb 2022 17:36:03 +0800 Subject: serial: sunplus-uart: Add Sunplus SoC UART Driver Add Sunplus SoC UART Driver. SP7021 UART block contains 5 UARTs. There are UART0~4 that supported in SP7021, the features list as below. Support Full-duplex communication. Support data packet length configurable. Support stop bit number configurable. Support force break condition. Support baud rate configurable. Support error detection and report. Support RXD Noise Rejection Vote configurable. UART0 pinout only support TX/RX two pins. UART1 to UART4 pinout support TX/RX/CTS/RTS four pins. Normally UART0 used for kernel console, also can be used for normal uart. Command line set "console=ttySUP0,115200", SUP means Sunplus Uart Port. UART driver probe will create path named "/dev/ttySUPx". https://sunplus.atlassian.net/wiki/spaces/doc/pages/1873412290/13.+Universal+Asynchronous+Receiver+Transmitter+UART Signed-off-by: Hammer Hsieh Link: https://lore.kernel.org/r/1645522563-17183-3-git-send-email-hammerh0314@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 8885e69178bd..6faf502b7860 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -277,4 +277,7 @@ /* Freescale LINFlexD UART */ #define PORT_LINFLEXUART 122 +/* Sunplus UART */ +#define PORT_SUNPLUS 123 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From c2faf737abfb10f88f2d2612d573e9edc3c42c37 Mon Sep 17 00:00:00 2001 From: Max Staudt Date: Fri, 11 Feb 2022 15:10:36 +0100 Subject: tty: Reserve ldisc 29 for development purposes It's handy to have an ldisc number free for out-of-tree testing. This way, a new ldisc can be developed on any running system, without having to recompile the kernel just to define a new number. This is the highest number (and also the last one) available under the old numbering scheme, so let's reserve it before it's too late. From now on, every new ldisc upstreamed will have to increment NR_LDISCS in lockstep with its addition to the table in tty.h. Signed-off-by: Max Staudt Link: https://lore.kernel.org/r/20220211141036.6403-1-max@enpas.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/tty.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index a58deb3061eb..9d0f06bfbac3 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -6,8 +6,6 @@ * 'tty.h' defines some structures used by tty_io.c and some defines. */ -#define NR_LDISCS 30 - /* line disciplines */ #define N_TTY 0 #define N_SLIP 1 @@ -39,5 +37,9 @@ #define N_SPEAKUP 26 /* Speakup communication with synths */ #define N_NULL 27 /* Null ldisc used for error handling */ #define N_MCTP 28 /* MCTP-over-serial */ +#define N_DEVELOPMENT 29 /* Manual out-of-tree testing */ + +/* Always the newest line discipline + 1 */ +#define NR_LDISCS 30 #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From fad278388e01e3658a356118bed8ee2c2408d280 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 May 2021 21:15:15 -0700 Subject: media: omap3isp: Use struct_group() for memcpy() region In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally writing across neighboring fields. Wrap the target region in struct_group(). This additionally fixes a theoretical misalignment of the copy (since the size of "buf" changes between 64-bit and 32-bit, but this is likely never built for 64-bit). FWIW, I think this code is totally broken on 64-bit (which appears to not be a "real" build configuration): it would either always fail (with an uninitialized data->buf_size) or would cause corruption in userspace due to the copy_to_user() in the call path against an uninitialized data->buf value: omap3isp_stat_request_statistics_time32(...) struct omap3isp_stat_data data64; ... omap3isp_stat_request_statistics(stat, &data64); int omap3isp_stat_request_statistics(struct ispstat *stat, struct omap3isp_stat_data *data) ... buf = isp_stat_buf_get(stat, data); static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, struct omap3isp_stat_data *data) ... if (buf->buf_size > data->buf_size) { ... return ERR_PTR(-EINVAL); } ... rval = copy_to_user(data->buf, buf->virt_addr, buf->buf_size); Regardless, additionally initialize data64 to be zero-filled to avoid undefined behavior. Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Arnd Bergmann Cc: Sakari Ailus Cc: linux-media@vger.kernel.org Fixes: 378e3f81cb56 ("media: omap3isp: support 64-bit version of omap3isp_stat_data") Cc: stable@vger.kernel.org Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/lkml/20211215220505.GB21862@embeddedor Signed-off-by: Kees Cook --- include/uapi/linux/omap3isp.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/omap3isp.h b/include/uapi/linux/omap3isp.h index 87b55755f4ff..d9db7ad43890 100644 --- a/include/uapi/linux/omap3isp.h +++ b/include/uapi/linux/omap3isp.h @@ -162,6 +162,7 @@ struct omap3isp_h3a_aewb_config { * struct omap3isp_stat_data - Statistic data sent to or received from user * @ts: Timestamp of returned framestats. * @buf: Pointer to pass to user. + * @buf_size: Size of buffer. * @frame_number: Frame number of requested stats. * @cur_frame: Current frame number being processed. * @config_counter: Number of the configuration associated with the data. @@ -176,10 +177,12 @@ struct omap3isp_stat_data { struct timeval ts; #endif void __user *buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #ifdef __KERNEL__ @@ -189,10 +192,12 @@ struct omap3isp_stat_data_time32 { __s32 tv_usec; } ts; __u32 buf; - __u32 buf_size; - __u16 frame_number; - __u16 cur_frame; - __u16 config_counter; + __struct_group(/* no tag */, frame, /* no attrs */, + __u32 buf_size; + __u16 frame_number; + __u16 cur_frame; + __u16 config_counter; + ); }; #endif -- cgit v1.2.3 From 89377bc1975c2993bde4a498a3a4e5817ac0ae2c Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Thu, 10 Feb 2022 11:07:55 +0530 Subject: nvme: add vectored-io support for user-passthrough Add a new NVME_IOCTL_IO64_CMD_VEC ioctl that works like the existing NVME_IOCTL_IO64_CMD ioctl except that it takes and array of iovecs and thus supports vectored I/O. - cmd.addr is base address of user iovec array - cmd.vec_cnt is count of iovec array elements This patch does not include vectored-variant for admin-commands as most of them are light on buffers and likely to have low invocation frequency. Signed-off-by: Kanchan Joshi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/uapi/linux/nvme_ioctl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index d99b5a772698..b2e43185e3b5 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -55,7 +55,10 @@ struct nvme_passthru_cmd64 { __u64 metadata; __u64 addr; __u32 metadata_len; - __u32 data_len; + union { + __u32 data_len; /* for non-vectored io */ + __u32 vec_cnt; /* for vectored io */ + }; __u32 cdw10; __u32 cdw11; __u32 cdw12; @@ -78,5 +81,6 @@ struct nvme_passthru_cmd64 { #define NVME_IOCTL_RESCAN _IO('N', 0x46) #define NVME_IOCTL_ADMIN64_CMD _IOWR('N', 0x47, struct nvme_passthru_cmd64) #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) +#define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- cgit v1.2.3 From 0c9f178778919bb500443f340647b44227778fb2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Feb 2022 10:52:42 +0800 Subject: iommu: Remove guest pasid related interfaces and definitions The guest pasid related uapi interfaces and definitions are not referenced anywhere in the tree. We've also reached a consensus to replace them with a new iommufd design. Remove them to avoid dead code. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220216025249.3459465-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/uapi/linux/iommu.h | 181 --------------------------------------------- 1 file changed, 181 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h index 59178fc229ca..65d8b0234f69 100644 --- a/include/uapi/linux/iommu.h +++ b/include/uapi/linux/iommu.h @@ -158,185 +158,4 @@ struct iommu_page_response { __u32 code; }; -/* defines the granularity of the invalidation */ -enum iommu_inv_granularity { - IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ - IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ - IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ - IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ -}; - -/** - * struct iommu_inv_addr_info - Address Selective Invalidation Structure - * - * @flags: indicates the granularity of the address-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If ARCHID bit is set, @archid is populated and the invalidation relates - * to cache entries tagged with this architecture specific ID and matching - * the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - If neither PASID or ARCHID is set, global addr invalidation applies. - * - The LEAF flag indicates whether only the leaf PTE caching needs to be - * invalidated and other paging structure caches can be preserved. - * @pasid: process address space ID - * @archid: architecture-specific ID - * @addr: first stage/level input address - * @granule_size: page/block size of the mapping in bytes - * @nb_granules: number of contiguous granules to be invalidated - */ -struct iommu_inv_addr_info { -#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) -#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) -#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) - __u32 flags; - __u32 archid; - __u64 pasid; - __u64 addr; - __u64 granule_size; - __u64 nb_granules; -}; - -/** - * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure - * - * @flags: indicates the granularity of the PASID-selective invalidation - * - If the PASID bit is set, the @pasid field is populated and the invalidation - * relates to cache entries tagged with this PASID and matching the address - * range. - * - If the ARCHID bit is set, the @archid is populated and the invalidation - * relates to cache entries tagged with this architecture specific ID and - * matching the address range. - * - Both PASID and ARCHID can be set as they may tag different caches. - * - At least one of PASID or ARCHID must be set. - * @pasid: process address space ID - * @archid: architecture-specific ID - */ -struct iommu_inv_pasid_info { -#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) -#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) - __u32 flags; - __u32 archid; - __u64 pasid; -}; - -/** - * struct iommu_cache_invalidate_info - First level/stage invalidation - * information - * @argsz: User filled size of this data - * @version: API version of this structure - * @cache: bitfield that allows to select which caches to invalidate - * @granularity: defines the lowest granularity used for the invalidation: - * domain > PASID > addr - * @padding: reserved for future use (should be zero) - * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID - * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR - * - * Not all the combinations of cache/granularity are valid: - * - * +--------------+---------------+---------------+---------------+ - * | type / | DEV_IOTLB | IOTLB | PASID | - * | granularity | | | cache | - * +==============+===============+===============+===============+ - * | DOMAIN | N/A | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | PASID | Y | Y | Y | - * +--------------+---------------+---------------+---------------+ - * | ADDR | Y | Y | N/A | - * +--------------+---------------+---------------+---------------+ - * - * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than - * @version and @cache. - * - * If multiple cache types are invalidated simultaneously, they all - * must support the used granularity. - */ -struct iommu_cache_invalidate_info { - __u32 argsz; -#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; -/* IOMMU paging structure cache */ -#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ -#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ -#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ -#define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; -}; - -/** - * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest - * SVA binding. - * - * @flags: VT-d PASID table entry attributes - * @pat: Page attribute table data to compute effective memory type - * @emt: Extended memory type - * - * Only guest vIOMMU selectable and effective options are passed down to - * the host IOMMU. - */ -struct iommu_gpasid_bind_data_vtd { -#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ -#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ -#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ -#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ -#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ -#define IOMMU_SVA_VTD_GPASID_WPE (1 << 6) /* Write protect enable */ -#define IOMMU_SVA_VTD_GPASID_LAST (1 << 7) - __u64 flags; - __u32 pat; - __u32 emt; -}; - -#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ - IOMMU_SVA_VTD_GPASID_EMTE | \ - IOMMU_SVA_VTD_GPASID_PCD | \ - IOMMU_SVA_VTD_GPASID_PWT) - -/** - * struct iommu_gpasid_bind_data - Information about device and guest PASID binding - * @argsz: User filled size of this data - * @version: Version of this data structure - * @format: PASID table entry format - * @flags: Additional information on guest bind request - * @gpgd: Guest page directory base of the guest mm to bind - * @hpasid: Process address space ID used for the guest mm in host IOMMU - * @gpasid: Process address space ID used for the guest mm in guest IOMMU - * @addr_width: Guest virtual address width - * @padding: Reserved for future use (should be zero) - * @vtd: Intel VT-d specific data - * - * Guest to host PASID mapping can be an identity or non-identity, where guest - * has its own PASID space. For non-identify mapping, guest to host PASID lookup - * is needed when VM programs guest PASID into an assigned device. VMM may - * trap such PASID programming then request host IOMMU driver to convert guest - * PASID to host PASID based on this bind data. - */ -struct iommu_gpasid_bind_data { - __u32 argsz; -#define IOMMU_GPASID_BIND_VERSION_1 1 - __u32 version; -#define IOMMU_PASID_FORMAT_INTEL_VTD 1 -#define IOMMU_PASID_FORMAT_LAST 2 - __u32 format; - __u32 addr_width; -#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ - __u64 flags; - __u64 gpgd; - __u64 hpasid; - __u64 gpasid; - __u8 padding[8]; - /* Vendor specific data */ - union { - struct iommu_gpasid_bind_data_vtd vtd; - } vendor; -}; - #endif /* _UAPI_IOMMU_H */ -- cgit v1.2.3 From 7b8135f4df98b155b23754b6065c157861e268f1 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 1 Mar 2022 05:04:34 +0000 Subject: rtnetlink: add new rtm tunnel api for tunnel id filtering This patch adds new rtm tunnel msg and api for tunnel id filtering in dst_metadata devices. First dst_metadata device to use the api is vxlan driver with AF_BRIDGE family. This and later changes add ability in vxlan driver to do tunnel id filtering (or vni filtering) on dst_metadata devices. This is similar to vlan api in the vlan filtering bridge. this patch includes selinux nlmsg_route_perms support for RTM_*TUNNEL api from Benjamin Poirier. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 26 ++++++++++++++++++++++++++ include/uapi/linux/rtnetlink.h | 9 +++++++++ 2 files changed, 35 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index be09d2ad4b5d..3dfc9ff2ec9b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -713,7 +713,32 @@ enum ipvlan_mode { #define IPVLAN_F_PRIVATE 0x01 #define IPVLAN_F_VEPA 0x02 +/* Tunnel RTM header */ +struct tunnel_msg { + __u8 family; + __u8 reserved1; + __u16 reserved2; + __u32 ifindex; +}; + /* VXLAN section */ +enum { + VXLAN_VNIFILTER_ENTRY_UNSPEC, + VXLAN_VNIFILTER_ENTRY_START, + VXLAN_VNIFILTER_ENTRY_END, + VXLAN_VNIFILTER_ENTRY_GROUP, + VXLAN_VNIFILTER_ENTRY_GROUP6, + __VXLAN_VNIFILTER_ENTRY_MAX +}; +#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) + +enum { + VXLAN_VNIFILTER_UNSPEC, + VXLAN_VNIFILTER_ENTRY, + __VXLAN_VNIFILTER_MAX +}; +#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1) + enum { IFLA_VXLAN_UNSPEC, IFLA_VXLAN_ID, @@ -745,6 +770,7 @@ enum { IFLA_VXLAN_GPE, IFLA_VXLAN_TTL_INHERIT, IFLA_VXLAN_DF, + IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 93d934cc4613..0970cb4b1b88 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -185,6 +185,13 @@ enum { RTM_GETNEXTHOPBUCKET, #define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -756,6 +763,8 @@ enum rtnetlink_groups { #define RTNLGRP_BRVLAN RTNLGRP_BRVLAN RTNLGRP_MCTP_IFADDR, #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 445b2f36bb4efb81f064e931f28b9ec19f114355 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 1 Mar 2022 05:04:39 +0000 Subject: drivers: vxlan: vnifilter: add support for stats dumping Add support for VXLAN vni filter entries' stats dumping Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3dfc9ff2ec9b..e315e53125f4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -716,18 +716,41 @@ enum ipvlan_mode { /* Tunnel RTM header */ struct tunnel_msg { __u8 family; - __u8 reserved1; + __u8 flags; __u16 reserved2; __u32 ifindex; }; /* VXLAN section */ + +/* include statistics in the dump */ +#define TUNNEL_MSG_FLAG_STATS 0x01 + +#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS + +/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */ +enum { + VNIFILTER_ENTRY_STATS_UNSPEC, + VNIFILTER_ENTRY_STATS_RX_BYTES, + VNIFILTER_ENTRY_STATS_RX_PKTS, + VNIFILTER_ENTRY_STATS_RX_DROPS, + VNIFILTER_ENTRY_STATS_RX_ERRORS, + VNIFILTER_ENTRY_STATS_TX_BYTES, + VNIFILTER_ENTRY_STATS_TX_PKTS, + VNIFILTER_ENTRY_STATS_TX_DROPS, + VNIFILTER_ENTRY_STATS_TX_ERRORS, + VNIFILTER_ENTRY_STATS_PAD, + __VNIFILTER_ENTRY_STATS_MAX +}; +#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1) + enum { VXLAN_VNIFILTER_ENTRY_UNSPEC, VXLAN_VNIFILTER_ENTRY_START, VXLAN_VNIFILTER_ENTRY_END, VXLAN_VNIFILTER_ENTRY_GROUP, VXLAN_VNIFILTER_ENTRY_GROUP6, + VXLAN_VNIFILTER_ENTRY_STATS, __VXLAN_VNIFILTER_ENTRY_MAX }; #define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1) -- cgit v1.2.3 From cedd3614e5d9c80908099c19f8716714ce0610b1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 24 Feb 2022 11:06:54 +0530 Subject: perf: Add irq and exception return branch types This expands generic branch type classification by adding two more entries there in i.e irq and exception return. Also updates the x86 implementation to process X86_BR_IRET and X86_BR_IRQ records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/1645681014-3346-1-git-send-email-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db..7dc71768749d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -251,6 +251,8 @@ enum { PERF_BR_SYSRET = 8, /* syscall return */ PERF_BR_COND_CALL = 9, /* conditional function call */ PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_ERET = 11, /* exception return */ + PERF_BR_IRQ = 12, /* irq */ PERF_BR_MAX, }; -- cgit v1.2.3 From 4f0bfdfd8323e5b461fc1042143a1097dba9fced Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Feb 2022 19:34:31 -0800 Subject: ELF: Properly redefine PT_GNU_* in terms of PT_LOOS The PT_GNU_* program header types are actually offsets from PT_LOOS, so redefine them as such, reorder them, and add the missing PT_GNU_RELRO. Cc: Eric Biederman Cc: Peter Collingbourne Cc: Catalin Marinas Cc: Dave Martin Signed-off-by: Kees Cook --- include/uapi/linux/elf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 61bf4774b8f2..6438d55529bf 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -35,10 +35,11 @@ typedef __s64 Elf64_Sxword; #define PT_HIOS 0x6fffffff /* OS-specific */ #define PT_LOPROC 0x70000000 #define PT_HIPROC 0x7fffffff -#define PT_GNU_EH_FRAME 0x6474e550 -#define PT_GNU_PROPERTY 0x6474e553 - +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) #define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + /* * Extended Numbering -- cgit v1.2.3 From dd0ca255f3d27a1bb43d8e9529fb3645f9a341a3 Mon Sep 17 00:00:00 2001 From: Daniel Braunwarth Date: Mon, 28 Feb 2022 14:30:28 +0100 Subject: if_ether.h: add PROFINET Ethertype Add the Ethertype for PROFINET protocol. Signed-off-by: Daniel Braunwarth Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index c0c2f3ed5729..4f4ed35a16db 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -86,6 +86,7 @@ * over Ethernet */ #define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ +#define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ -- cgit v1.2.3 From cd73cda742fbe1f33ed7306c7a01aa64f4e6ebd5 Mon Sep 17 00:00:00 2001 From: Daniel Braunwarth Date: Mon, 28 Feb 2022 14:30:29 +0100 Subject: if_ether.h: add EtherCAT Ethertype Add the Ethertype for EtherCAT protocol. Signed-off-by: Daniel Braunwarth Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 4f4ed35a16db..1d0bccc3fa54 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -89,6 +89,7 @@ #define ETH_P_PROFINET 0x8892 /* PROFINET */ #define ETH_P_REALTEK 0x8899 /* Multiple proprietary protocols */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_ETHERCAT 0x88A4 /* EtherCAT */ #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #define ETH_P_802_EX1 0x88B5 /* 802.1 Local Experimental 1. */ #define ETH_P_PREAUTH 0x88C7 /* 802.11 Preauthentication */ -- cgit v1.2.3 From d58b8a99cbb84c1eb3b3613d23c1a328695a9455 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 25 Feb 2022 12:33:51 -0500 Subject: drm/amdkfd: Add SMI add event helper To remove duplicate code, unify event message format and simplify new event add in the following patches. Use KFD_SMI_EVENT_MSG_SIZE to define msg size, the same size will be used in user space to alloc the msg receive buffer. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index baec5a41de3e..b40687bf1014 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -463,6 +463,7 @@ enum kfd_smi_event { }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) +#define KFD_SMI_EVENT_MSG_SIZE 96 struct kfd_ioctl_smi_events_args { __u32 gpuid; /* to KFD */ -- cgit v1.2.3 From 46efc97b73060823fdc18103a5e317a8327d44e1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:17 +0200 Subject: net: rtnetlink: RTM_GETSTATS: Allow filtering inside nests The filter_mask field of RTM_GETSTATS header determines which top-level attributes should be included in the netlink response. This saves processing time by only including the bits that the user cares about instead of always dumping everything. This is doubly important for HW-backed statistics that would typically require a trip to the device to fetch the stats. So far there was only one HW-backed stat suite per attribute. However, IFLA_STATS_LINK_OFFLOAD_XSTATS is a nest, and will gain a new stat suite in the following patches. It would therefore be advantageous to be able to filter within that nest, and select just one or the other HW-backed statistics suite. Extend rtnetlink so that RTM_GETSTATS permits attributes in the payload. The scheme is as follows: - RTM_GETSTATS - struct if_stats_msg - attr nest IFLA_STATS_GET_FILTERS - attr IFLA_STATS_LINK_OFFLOAD_XSTATS - u32 filter_mask This scheme reuses the existing enumerators by nesting them in a dedicated context attribute. This is covered by policies as usual, therefore a gradual opt-in is possible. Currently only IFLA_STATS_LINK_OFFLOAD_XSTATS nest has filtering enabled, because for the SW counters the issue does not seem to be that important. rtnl_offload_xstats_get_size() and _fill() are extended to observe the requested filters. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e315e53125f4..4d62ea6e1288 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1207,6 +1207,16 @@ enum { #define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1)) +enum { + IFLA_STATS_GETSET_UNSPEC, + IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with + * a filter mask for the corresponding group. + */ + __IFLA_STATS_GETSET_MAX, +}; + +#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1) + /* These are embedded into IFLA_STATS_LINK_XSTATS: * [IFLA_STATS_LINK_XSTATS] * -> [LINK_XSTATS_TYPE_xxx] -- cgit v1.2.3 From 9309f97aef6d8250bb484dabeac925c3a7c57716 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:20 +0200 Subject: net: dev: Add hardware stats support Offloading switch device drivers may be able to collect statistics of the traffic taking place in the HW datapath that pertains to a certain soft netdevice, such as VLAN. Add the necessary infrastructure to allow exposing these statistics to the offloaded netdevice in question. The API was shaped by the following considerations: - Collection of HW statistics is not free: there may be a finite number of counters, and the act of counting may have a performance impact. It is therefore necessary to allow toggling whether HW counting should be done for any particular SW netdevice. - As the drivers are loaded and removed, a particular device may get offloaded and unoffloaded again. At the same time, the statistics values need to stay monotonic (modulo the eventual 64-bit wraparound), increasing only to reflect traffic measured in the device. To that end, the netdevice keeps around a lazily-allocated copy of struct rtnl_link_stats64. Device drivers then contribute to the values kept therein at various points. Even as the driver goes away, the struct stays around to maintain the statistics values. - Different HW devices may be able to count different things. The motivation behind this patch in particular is exposure of HW counters on Nvidia Spectrum switches, where the only practical approach to counting traffic on offloaded soft netdevices currently is to use router interface counters, and count L3 traffic. Correspondingly that is the statistics suite added in this patch. Other devices may be able to measure different kinds of traffic, and for that reason, the APIs are built to allow uniform access to different statistics suites. - Because soft netdevices and offloading drivers are only loosely bound, a netdevice uses a notifier chain to communicate with the drivers. Several new notifiers, NETDEV_OFFLOAD_XSTATS_*, have been added to carry messages to the offloading drivers. - Devices can have various conditions for when a particular counter is available. As the device is configured and reconfigured, the device offload may become or cease being suitable for counter binding. A netdevice can use a notifier type NETDEV_OFFLOAD_XSTATS_REPORT_USED to ping offloading drivers and determine whether anyone currently implements a given statistics suite. This information can then be propagated to user space. When the driver decides to unoffload a netdevice, it can use a newly-added function, netdev_offload_xstats_report_delta(), to record outstanding collected statistics, before destroying the HW counter. This patch adds a helper, call_netdevice_notifiers_info_robust(), for dispatching a notifier with the possibility of unwind when one of the consumers bails. Given the wish to eventually get rid of the global notifier block altogether, this helper only invokes the per-netns notifier block. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4d62ea6e1288..ef6a62a2e15d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -245,6 +245,21 @@ struct rtnl_link_stats64 { __u64 rx_nohandler; }; +/* Subset of link stats useful for in-HW collection. Meaning of the fields is as + * for struct rtnl_link_stats64. + */ +struct rtnl_hw_stats64 { + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; +}; + /* The struct should be in sync with struct ifmap */ struct rtnl_link_ifmap { __u64 mem_start; -- cgit v1.2.3 From 0e7788fd76222dba3229eada9162efab185923fc Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:21 +0200 Subject: net: rtnetlink: Add UAPI for obtaining L3 offload xstats Add a new IFLA_STATS_LINK_OFFLOAD_XSTATS child attribute, IFLA_OFFLOAD_XSTATS_L3_STATS, to carry statistics for traffic that takes place in a HW router. The offloaded HW stats are designed to allow per-netdevice enablement and disablement. Additionally, as a netdevice is configured, it may become or cease being suitable for binding of a HW counter. Both of these aspects need to be communicated to the userspace. To that end, add another child attribute, IFLA_OFFLOAD_XSTATS_HW_S_INFO: - attr nest IFLA_OFFLOAD_XSTATS_HW_S_INFO - attr nest IFLA_OFFLOAD_XSTATS_L3_STATS - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST - {0,1} as u8 - attr IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED - {0,1} as u8 Thus this one attribute is a nest that can be used to carry information about various types of HW statistics, and indexing is very simply done by wrapping the information for a given statistics suite into the attribute that carries the suite is the RTM_GETSTATS query. At the same time, because _HW_S_INFO is nested directly below IFLA_STATS_LINK_OFFLOAD_XSTATS, it is possible through filtering to request only the metadata about individual statistics suites, without having to hit the HW to get the actual counters. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ef6a62a2e15d..b1031f481d2f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1249,10 +1249,21 @@ enum { enum { IFLA_OFFLOAD_XSTATS_UNSPEC, IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */ + IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */ __IFLA_OFFLOAD_XSTATS_MAX }; #define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) +enum { + IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC, + IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */ + IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */ + __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX, +}; +#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \ + (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1) + /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -- cgit v1.2.3 From 03ba35667091337d8e632cf4b814f1c1b914609b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:22 +0200 Subject: net: rtnetlink: Add RTM_SETSTATS The offloaded HW stats are designed to allow per-netdevice enablement and disablement. These stats are only accessible through RTM_GETSTATS, and therefore should be toggled by a RTM_SETSTATS message. Add it, and the necessary skeleton handler. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0970cb4b1b88..14462dc159fd 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -146,6 +146,8 @@ enum { #define RTM_NEWSTATS RTM_NEWSTATS RTM_GETSTATS = 94, #define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS RTM_NEWCACHEREPORT = 96, #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT -- cgit v1.2.3 From 5fd0b838efac16046509f7fb100455d0463b9687 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 2 Mar 2022 18:31:23 +0200 Subject: net: rtnetlink: Add UAPI toggle for IFLA_OFFLOAD_XSTATS_L3_STATS The offloaded HW stats are designed to allow per-netdevice enablement and disablement. Add an attribute, IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, which should be carried by the RTM_SETSTATS message, and expresses a desire to toggle L3 offload xstats on or off. As part of the above, add an exported function rtnl_offload_xstats_notify() that drivers can use when they have installed or deinstalled the counters backing the HW stats. At this point, it is possible to enable, disable and query L3 offload xstats on netdevices. (However there is no driver actually implementing these.) Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + include/uapi/linux/rtnetlink.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b1031f481d2f..ddca20357e7e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1227,6 +1227,7 @@ enum { IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with * a filter mask for the corresponding group. */ + IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */ __IFLA_STATS_GETSET_MAX, }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 14462dc159fd..51530aade46e 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -767,6 +767,8 @@ enum rtnetlink_groups { #define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR RTNLGRP_TUNNEL, #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) -- cgit v1.2.3 From 115dcec65f61d53e25e1bed5e380468b30f98b14 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:18 +0200 Subject: vfio: Define device migration protocol v2 Replace the existing region based migration protocol with an ioctl based protocol. The two protocols have the same general semantic behaviors, but the way the data is transported is changed. This is the STOP_COPY portion of the new protocol, it defines the 5 states for basic stop and copy migration and the protocol to move the migration data in/out of the kernel. Compared to the clarification of the v1 protocol Alex proposed: https://lore.kernel.org/r/163909282574.728533.7460416142511440919.stgit@omen This has a few deliberate functional differences: - ERROR arcs allow the device function to remain unchanged. - The protocol is not required to return to the original state on transition failure. Instead userspace can execute an unwind back to the original state, reset, or do something else without needing kernel support. This simplifies the kernel design and should userspace choose a policy like always reset, avoids doing useless work in the kernel on error handling paths. - PRE_COPY is made optional, userspace must discover it before using it. This reflects the fact that the majority of drivers we are aware of right now will not implement PRE_COPY. - segmentation is not part of the data stream protocol, the receiver does not have to reproduce the framing boundaries. The hybrid FSM for the device_state is described as a Mealy machine by documenting each of the arcs the driver is required to implement. Defining the remaining set of old/new device_state transitions as 'combination transitions' which are naturally defined as taking multiple FSM arcs along the shortest path within the FSM's digraph allows a complete matrix of transitions. A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE is defined to replace writing to the device_state field in the region. This allows returning a brand new FD whenever the requested transition opens a data transfer session. The VFIO core code implements the new feature and provides a helper function to the driver. Using the helper the driver only has to implement 6 of the FSM arcs and the other combination transitions are elaborated consistently from those arcs. A new VFIO_DEVICE_FEATURE of VFIO_DEVICE_FEATURE_MIGRATION is defined to report the capability for migration and indicate which set of states and arcs are supported by the device. The FSM provides a lot of flexibility to make backwards compatible extensions but the VFIO_DEVICE_FEATURE also allows for future breaking extensions for scenarios that cannot support even the basic STOP_COPY requirements. The VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE with the GET option (i.e. VFIO_DEVICE_FEATURE_GET) can be used to read the current migration state of the VFIO device. Data transfer sessions are now carried over a file descriptor, instead of the region. The FD functions for the lifetime of the data transfer session. read() and write() transfer the data with normal Linux stream FD semantics. This design allows future expansion to support poll(), io_uring, and other performance optimizations. The complicated mmap mode for data transfer is discarded as current qemu doesn't take meaningful advantage of it, and the new qemu implementation avoids substantially all the performance penalty of using a read() on the region. Link: https://lore.kernel.org/all/20220224142024.147653-10-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 174 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 161 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ef33ea002b0b..22ed358c04c5 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -605,25 +605,25 @@ struct vfio_region_gfx_edid { struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -#define VFIO_DEVICE_STATE_STOP (0) -#define VFIO_DEVICE_STATE_RUNNING (1 << 0) -#define VFIO_DEVICE_STATE_SAVING (1 << 1) -#define VFIO_DEVICE_STATE_RESUMING (1 << 2) -#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ - VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) +#define VFIO_DEVICE_STATE_V1_STOP (0) +#define VFIO_DEVICE_STATE_V1_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_V1_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_V1_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_V1_RUNNING | \ + VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) #define VFIO_DEVICE_STATE_VALID(state) \ - (state & VFIO_DEVICE_STATE_RESUMING ? \ - (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + (state & VFIO_DEVICE_STATE_V1_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_V1_RESUMING : 1) #define VFIO_DEVICE_STATE_IS_ERROR(state) \ - ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING)) + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING)) #define VFIO_DEVICE_STATE_SET_ERROR(state) \ - ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ - VFIO_DEVICE_STATE_RESUMING) + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_STATE_V1_SAVING | \ + VFIO_DEVICE_STATE_V1_RESUMING) __u32 reserved; __u64 pending_bytes; @@ -1002,6 +1002,154 @@ struct vfio_device_feature { */ #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) +/* + * Indicates the device can support the migration API through + * VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. If this GET succeeds, the RUNNING and + * ERROR states are always supported. Support for additional states is + * indicated via the flags field; at least VFIO_MIGRATION_STOP_COPY must be + * set. + * + * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and + * RESUMING are supported. + */ +struct vfio_device_feature_migration { + __aligned_u64 flags; +#define VFIO_MIGRATION_STOP_COPY (1 << 0) +}; +#define VFIO_DEVICE_FEATURE_MIGRATION 1 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, execute a migration state change on the VFIO + * device. The new state is supplied in device_state, see enum + * vfio_device_mig_state for details + * + * The kernel migration driver must fully transition the device to the new state + * value before the operation returns to the user. + * + * The kernel migration driver must not generate asynchronous device state + * transitions outside of manipulation by the user or the VFIO_DEVICE_RESET + * ioctl as described above. + * + * If this function fails then current device_state may be the original + * operating state or some other state along the combination transition path. + * The user can then decide if it should execute a VFIO_DEVICE_RESET, attempt + * to return to the original state, or attempt to return to some other state + * such as RUNNING or STOP. + * + * If the new_state starts a new data transfer session then the FD associated + * with that session is returned in data_fd. The user is responsible to close + * this FD when it is finished. The user must consider the migration data stream + * carried over the FD to be opaque and must preserve the byte order of the + * stream. The user is not required to preserve buffer segmentation when writing + * the data stream during the RESUMING operation. + * + * Upon VFIO_DEVICE_FEATURE_GET, get the current migration state of the VFIO + * device, data_fd will be -1. + */ +struct vfio_device_feature_mig_state { + __u32 device_state; /* From enum vfio_device_mig_state */ + __s32 data_fd; +}; +#define VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE 2 + +/* + * The device migration Finite State Machine is described by the enum + * vfio_device_mig_state. Some of the FSM arcs will create a migration data + * transfer session by returning a FD, in this case the migration data will + * flow over the FD using read() and write() as discussed below. + * + * There are 5 states to support VFIO_MIGRATION_STOP_COPY: + * RUNNING - The device is running normally + * STOP - The device does not change the internal or external state + * STOP_COPY - The device internal state can be read out + * RESUMING - The device is stopped and is loading a new internal state + * ERROR - The device has failed and must be reset + * + * The FSM takes actions on the arcs between FSM states. The driver implements + * the following behavior for the FSM arcs: + * + * RUNNING -> STOP + * STOP_COPY -> STOP + * While in STOP the device must stop the operation of the device. The device + * must not generate interrupts, DMA, or any other change to external state. + * It must not change its internal state. When stopped the device and kernel + * migration driver must accept and respond to interaction to support external + * subsystems in the STOP state, for example PCI MSI-X and PCI config space. + * Failure by the user to restrict device access while in STOP must not result + * in error conditions outside the user context (ex. host system faults). + * + * The STOP_COPY arc will terminate a data transfer session. + * + * RESUMING -> STOP + * Leaving RESUMING terminates a data transfer session and indicates the + * device should complete processing of the data delivered by write(). The + * kernel migration driver should complete the incorporation of data written + * to the data transfer FD into the device internal state and perform + * final validity and consistency checking of the new device state. If the + * user provided data is found to be incomplete, inconsistent, or otherwise + * invalid, the migration driver must fail the SET_STATE ioctl and + * optionally go to the ERROR state as described below. + * + * While in STOP the device has the same behavior as other STOP states + * described above. + * + * To abort a RESUMING session the device must be reset. + * + * STOP -> RUNNING + * While in RUNNING the device is fully operational, the device may generate + * interrupts, DMA, respond to MMIO, all vfio device regions are functional, + * and the device may advance its internal state. + * + * STOP -> STOP_COPY + * This arc begin the process of saving the device state and will return a + * new data_fd. + * + * While in the STOP_COPY state the device has the same behavior as STOP + * with the addition that the data transfers session continues to stream the + * migration state. End of stream on the FD indicates the entire device + * state has been transferred. + * + * The user should take steps to restrict access to vfio device regions while + * the device is in STOP_COPY or risk corruption of the device migration data + * stream. + * + * STOP -> RESUMING + * Entering the RESUMING state starts a process of restoring the device state + * and will return a new data_fd. The data stream fed into the data_fd should + * be taken from the data transfer output of a single FD during saving from + * a compatible device. The migration driver may alter/reset the internal + * device state for this arc if required to prepare the device to receive the + * migration data. + * + * any -> ERROR + * ERROR cannot be specified as a device state, however any transition request + * can be failed with an errno return and may then move the device_state into + * ERROR. In this case the device was unable to execute the requested arc and + * was also unable to restore the device to any valid device_state. + * To recover from ERROR VFIO_DEVICE_RESET must be used to return the + * device_state back to RUNNING. + * + * The remaining possible transitions are interpreted as combinations of the + * above FSM arcs. As there are multiple paths through the FSM arcs the path + * should be selected based on the following rules: + * - Select the shortest path. + * Refer to vfio_mig_get_next_state() for the result of the algorithm. + * + * The automatic transit through the FSM arcs that make up the combination + * transition is invisible to the user. When working with combination arcs the + * user may see any step along the path in the device_state if SET_STATE + * fails. When handling these types of errors users should anticipate future + * revisions of this protocol using new states and those states becoming + * visible in this case. + */ +enum vfio_device_mig_state { + VFIO_DEVICE_STATE_ERROR = 0, + VFIO_DEVICE_STATE_STOP = 1, + VFIO_DEVICE_STATE_RUNNING = 2, + VFIO_DEVICE_STATE_STOP_COPY = 3, + VFIO_DEVICE_STATE_RESUMING = 4, +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 8cb3d83b959be0631cd719b995c40c3cda21cd47 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:19 +0200 Subject: vfio: Extend the device migration protocol with RUNNING_P2P The RUNNING_P2P state is designed to support multiple devices in the same VM that are doing P2P transactions between themselves. When in RUNNING_P2P the device must be able to accept incoming P2P transactions but should not generate outgoing P2P transactions. As an optional extension to the mandatory states it is defined as in between STOP and RUNNING: STOP -> RUNNING_P2P -> RUNNING -> RUNNING_P2P -> STOP For drivers that are unable to support RUNNING_P2P the core code silently merges RUNNING_P2P and RUNNING together. Unless driver support is present, the new state cannot be used in SET_STATE. Drivers that support this will be required to implement 4 FSM arcs beyond the basic FSM. 2 of the basic FSM arcs become combination transitions. Compared to the v1 clarification, NDMA is redefined into FSM states and is described in terms of the desired P2P quiescent behavior, noting that halting all DMA is an acceptable implementation. Link: https://lore.kernel.org/all/20220224142024.147653-11-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Kevin Tian Reviewed-by: Alex Williamson Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 22ed358c04c5..26a66f68371d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1011,10 +1011,16 @@ struct vfio_device_feature { * * VFIO_MIGRATION_STOP_COPY means that STOP, STOP_COPY and * RESUMING are supported. + * + * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P + * is supported in addition to the STOP_COPY states. + * + * Other combinations of flags have behavior to be defined in the future. */ struct vfio_device_feature_migration { __aligned_u64 flags; #define VFIO_MIGRATION_STOP_COPY (1 << 0) +#define VFIO_MIGRATION_P2P (1 << 1) }; #define VFIO_DEVICE_FEATURE_MIGRATION 1 @@ -1065,10 +1071,13 @@ struct vfio_device_feature_mig_state { * RESUMING - The device is stopped and is loading a new internal state * ERROR - The device has failed and must be reset * + * And 1 optional state to support VFIO_MIGRATION_P2P: + * RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA + * * The FSM takes actions on the arcs between FSM states. The driver implements * the following behavior for the FSM arcs: * - * RUNNING -> STOP + * RUNNING_P2P -> STOP * STOP_COPY -> STOP * While in STOP the device must stop the operation of the device. The device * must not generate interrupts, DMA, or any other change to external state. @@ -1095,11 +1104,16 @@ struct vfio_device_feature_mig_state { * * To abort a RESUMING session the device must be reset. * - * STOP -> RUNNING + * RUNNING_P2P -> RUNNING * While in RUNNING the device is fully operational, the device may generate * interrupts, DMA, respond to MMIO, all vfio device regions are functional, * and the device may advance its internal state. * + * RUNNING -> RUNNING_P2P + * STOP -> RUNNING_P2P + * While in RUNNING_P2P the device is partially running in the P2P quiescent + * state defined below. + * * STOP -> STOP_COPY * This arc begin the process of saving the device state and will return a * new data_fd. @@ -1129,6 +1143,18 @@ struct vfio_device_feature_mig_state { * To recover from ERROR VFIO_DEVICE_RESET must be used to return the * device_state back to RUNNING. * + * The optional peer to peer (P2P) quiescent state is intended to be a quiescent + * state for the device for the purposes of managing multiple devices within a + * user context where peer-to-peer DMA between devices may be active. The + * RUNNING_P2P states must prevent the device from initiating + * any new P2P DMA transactions. If the device can identify P2P transactions + * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration + * driver must complete any such outstanding operations prior to completing the + * FSM arc into a P2P state. For the purpose of specification the states + * behave as though the device was fully running if not supported. Like while in + * STOP or STOP_COPY the user must not touch the device, otherwise the state + * can be exited. + * * The remaining possible transitions are interpreted as combinations of the * above FSM arcs. As there are multiple paths through the FSM arcs the path * should be selected based on the following rules: @@ -1141,6 +1167,11 @@ struct vfio_device_feature_mig_state { * fails. When handling these types of errors users should anticipate future * revisions of this protocol using new states and those states becoming * visible in this case. + * + * The optional states cannot be used with SET_STATE if the device does not + * support them. The user can discover if these states are supported by using + * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can + * avoid knowing about these optional states if the kernel driver supports them. */ enum vfio_device_mig_state { VFIO_DEVICE_STATE_ERROR = 0, @@ -1148,6 +1179,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING = 2, VFIO_DEVICE_STATE_STOP_COPY = 3, VFIO_DEVICE_STATE_RESUMING = 4, + VFIO_DEVICE_STATE_RUNNING_P2P = 5, }; /* -------- API for Type1 VFIO IOMMU -------- */ -- cgit v1.2.3 From 0f3f9cd7f752f6e685e378620babc5e34af6fb9f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 24 Feb 2022 16:20:20 +0200 Subject: vfio: Remove migration protocol v1 documentation v1 was never implemented and is replaced by v2. The old uAPI documentation is removed from the header file. The old uAPI definitions are still kept in the header file to ease transition for userspace copying these headers. They will be fully removed down the road. Link: https://lore.kernel.org/all/20220224142024.147653-12-yishaih@nvidia.com Signed-off-by: Jason Gunthorpe Tested-by: Shameer Kolothum Reviewed-by: Alex Williamson Reviewed-by: Cornelia Huck Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky --- include/uapi/linux/vfio.h | 200 +--------------------------------------------- 1 file changed, 2 insertions(+), 198 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 26a66f68371d..fea86061b44e 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -323,7 +323,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) -#define VFIO_REGION_TYPE_MIGRATION (3) +#define VFIO_REGION_TYPE_MIGRATION_DEPRECATED (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -405,203 +405,7 @@ struct vfio_region_gfx_edid { #define VFIO_REGION_SUBTYPE_CCW_CRW (3) /* sub-types for VFIO_REGION_TYPE_MIGRATION */ -#define VFIO_REGION_SUBTYPE_MIGRATION (1) - -/* - * The structure vfio_device_migration_info is placed at the 0th offset of - * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related - * migration information. Field accesses from this structure are only supported - * at their native width and alignment. Otherwise, the result is undefined and - * vendor drivers should return an error. - * - * device_state: (read/write) - * - The user application writes to this field to inform the vendor driver - * about the device state to be transitioned to. - * - The vendor driver should take the necessary actions to change the - * device state. After successful transition to a given state, the - * vendor driver should return success on write(device_state, state) - * system call. If the device state transition fails, the vendor driver - * should return an appropriate -errno for the fault condition. - * - On the user application side, if the device state transition fails, - * that is, if write(device_state, state) returns an error, read - * device_state again to determine the current state of the device from - * the vendor driver. - * - The vendor driver should return previous state of the device unless - * the vendor driver has encountered an internal error, in which case - * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. - * - The user application must use the device reset ioctl to recover the - * device from VFIO_DEVICE_STATE_ERROR state. If the device is - * indicated to be in a valid device state by reading device_state, the - * user application may attempt to transition the device to any valid - * state reachable from the current state or terminate itself. - * - * device_state consists of 3 bits: - * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, - * it indicates the _STOP state. When the device state is changed to - * _STOP, driver should stop the device before write() returns. - * - If bit 1 is set, it indicates the _SAVING state, which means that the - * driver should start gathering device state information that will be - * provided to the VFIO user application to save the device's state. - * - If bit 2 is set, it indicates the _RESUMING state, which means that - * the driver should prepare to resume the device. Data provided through - * the migration region should be used to resume the device. - * Bits 3 - 31 are reserved for future use. To preserve them, the user - * application should perform a read-modify-write operation on this - * field when modifying the specified bits. - * - * +------- _RESUMING - * |+------ _SAVING - * ||+----- _RUNNING - * ||| - * 000b => Device Stopped, not saving or resuming - * 001b => Device running, which is the default state - * 010b => Stop the device & save the device state, stop-and-copy state - * 011b => Device running and save the device state, pre-copy state - * 100b => Device stopped and the device state is resuming - * 101b => Invalid state - * 110b => Error state - * 111b => Invalid state - * - * State transitions: - * - * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP - * (100b) (001b) (011b) (010b) (000b) - * 0. Running or default state - * | - * - * 1. Normal Shutdown (optional) - * |------------------------------------->| - * - * 2. Save the state or suspend - * |------------------------->|---------->| - * - * 3. Save the state during live migration - * |----------->|------------>|---------->| - * - * 4. Resuming - * |<---------| - * - * 5. Resumed - * |--------->| - * - * 0. Default state of VFIO device is _RUNNING when the user application starts. - * 1. During normal shutdown of the user application, the user application may - * optionally change the VFIO device state from _RUNNING to _STOP. This - * transition is optional. The vendor driver must support this transition but - * must not require it. - * 2. When the user application saves state or suspends the application, the - * device state transitions from _RUNNING to stop-and-copy and then to _STOP. - * On state transition from _RUNNING to stop-and-copy, driver must stop the - * device, save the device state and send it to the application through the - * migration region. The sequence to be followed for such transition is given - * below. - * 3. In live migration of user application, the state transitions from _RUNNING - * to pre-copy, to stop-and-copy, and to _STOP. - * On state transition from _RUNNING to pre-copy, the driver should start - * gathering the device state while the application is still running and send - * the device state data to application through the migration region. - * On state transition from pre-copy to stop-and-copy, the driver must stop - * the device, save the device state and send it to the user application - * through the migration region. - * Vendor drivers must support the pre-copy state even for implementations - * where no data is provided to the user before the stop-and-copy state. The - * user must not be required to consume all migration data before the device - * transitions to a new state, including the stop-and-copy state. - * The sequence to be followed for above two transitions is given below. - * 4. To start the resuming phase, the device state should be transitioned from - * the _RUNNING to the _RESUMING state. - * In the _RESUMING state, the driver should use the device state data - * received through the migration region to resume the device. - * 5. After providing saved device data to the driver, the application should - * change the state from _RESUMING to _RUNNING. - * - * reserved: - * Reads on this field return zero and writes are ignored. - * - * pending_bytes: (read only) - * The number of pending bytes still to be migrated from the vendor driver. - * - * data_offset: (read only) - * The user application should read data_offset field from the migration - * region. The user application should read the device data from this - * offset within the migration region during the _SAVING state or write - * the device data during the _RESUMING state. See below for details of - * sequence to be followed. - * - * data_size: (read/write) - * The user application should read data_size to get the size in bytes of - * the data copied in the migration region during the _SAVING state and - * write the size in bytes of the data copied in the migration region - * during the _RESUMING state. - * - * The format of the migration region is as follows: - * ------------------------------------------------------------------ - * |vfio_device_migration_info| data section | - * | | /////////////////////////////// | - * ------------------------------------------------------------------ - * ^ ^ - * offset 0-trapped part data_offset - * - * The structure vfio_device_migration_info is always followed by the data - * section in the region, so data_offset will always be nonzero. The offset - * from where the data is copied is decided by the kernel driver. The data - * section can be trapped, mmapped, or partitioned, depending on how the kernel - * driver defines the data section. The data section partition can be defined - * as mapped by the sparse mmap capability. If mmapped, data_offset must be - * page aligned, whereas initial section which contains the - * vfio_device_migration_info structure, might not end at the offset, which is - * page aligned. The user is not required to access through mmap regardless - * of the capabilities of the region mmap. - * The vendor driver should determine whether and how to partition the data - * section. The vendor driver should return data_offset accordingly. - * - * The sequence to be followed while in pre-copy state and stop-and-copy state - * is as follows: - * a. Read pending_bytes, indicating the start of a new iteration to get device - * data. Repeated read on pending_bytes at this stage should have no side - * effects. - * If pending_bytes == 0, the user application should not iterate to get data - * for that device. - * If pending_bytes > 0, perform the following steps. - * b. Read data_offset, indicating that the vendor driver should make data - * available through the data section. The vendor driver should return this - * read operation only after data is available from (region + data_offset) - * to (region + data_offset + data_size). - * c. Read data_size, which is the amount of data in bytes available through - * the migration region. - * Read on data_offset and data_size should return the offset and size of - * the current buffer if the user application reads data_offset and - * data_size more than once here. - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * e. Process the data. - * f. Read pending_bytes, which indicates that the data from the previous - * iteration has been read. If pending_bytes > 0, go to step b. - * - * The user application can transition from the _SAVING|_RUNNING - * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the - * number of pending bytes. The user application should iterate in _SAVING - * (stop-and-copy) until pending_bytes is 0. - * - * The sequence to be followed while _RESUMING device state is as follows: - * While data for this device is available, repeat the following steps: - * a. Read data_offset from where the user application should write data. - * b. Write migration data starting at the migration region + data_offset for - * the length determined by data_size from the migration source. - * c. Write data_size, which indicates to the vendor driver that data is - * written in the migration region. Vendor driver must return this write - * operations on consuming data. Vendor driver should apply the - * user-provided migration region data to the device resume state. - * - * If an error occurs during the above sequences, the vendor driver can return - * an error code for next read() or write() operation, which will terminate the - * loop. The user application should then take the next necessary action, for - * example, failing migration or terminating the user application. - * - * For the user application, data is opaque. The user application should write - * data in the same order as the data is received and the data should be of - * same transaction size at the source. - */ +#define VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED (1) struct vfio_device_migration_info { __u32 device_state; /* VFIO device state */ -- cgit v1.2.3 From 8d21ec0e46ed6e39994accff8eb4f2be3d2e76b5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 2 Mar 2022 11:56:34 -0800 Subject: bpf: Add __sk_buff->delivery_time_type and bpf_skb_set_skb_delivery_time() * __sk_buff->delivery_time_type: This patch adds __sk_buff->delivery_time_type. It tells if the delivery_time is stored in __sk_buff->tstamp or not. It will be most useful for ingress to tell if the __sk_buff->tstamp has the (rcv) timestamp or delivery_time. If delivery_time_type is 0 (BPF_SKB_DELIVERY_TIME_NONE), it has the (rcv) timestamp. Two non-zero types are defined for the delivery_time_type, BPF_SKB_DELIVERY_TIME_MONO and BPF_SKB_DELIVERY_TIME_UNSPEC. For UNSPEC, it can only happen in egress because only mono delivery_time can be forwarded to ingress now. The clock of UNSPEC delivery_time can be deduced from the skb->sk->sk_clockid which is how the sch_etf doing it also. * Provide forwarded delivery_time to tc-bpf@ingress: With the help of the new delivery_time_type, the tc-bpf has a way to tell if the __sk_buff->tstamp has the (rcv) timestamp or the delivery_time. During bpf load time, the verifier will learn if the bpf prog has accessed the new __sk_buff->delivery_time_type. If it does, it means the tc-bpf@ingress is expecting the skb->tstamp could have the delivery_time. The kernel will then read the skb->tstamp as-is during bpf insn rewrite without checking the skb->mono_delivery_time. This is done by adding a new prog->delivery_time_access bit. The same goes for writing skb->tstamp. * bpf_skb_set_delivery_time(): The bpf_skb_set_delivery_time() helper is added to allow setting both delivery_time and the delivery_time_type at the same time. If the tc-bpf does not need to change the delivery_time_type, it can directly write to the __sk_buff->tstamp as the existing tc-bpf has already been doing. It will be most useful at ingress to change the __sk_buff->tstamp from the (rcv) timestamp to a mono delivery_time and then bpf_redirect_*(). bpf only has mono clock helper (bpf_ktime_get_ns), and the current known use case is the mono EDT for fq, and only mono delivery time can be kept during forward now, so bpf_skb_set_delivery_time() only supports setting BPF_SKB_DELIVERY_TIME_MONO. It can be extended later when use cases come up and the forwarding path also supports other clock bases. Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index afe3d0d7f5f2..4eebea830613 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5086,6 +5086,37 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. + * + * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * Description + * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also + * change the __sk_buff->delivery_time_type to *dtime_type*. + * + * When setting a delivery time (non zero *dtime*) to + * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* + * is supported. It is the only delivery_time_type that will be + * kept after bpf_redirect_*(). + * + * If there is no need to change the __sk_buff->delivery_time_type, + * the delivery time can be directly written to __sk_buff->tstamp + * instead. + * + * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE + * can be used to clear any delivery time stored in + * __sk_buff->tstamp. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5280,6 +5311,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ + FN(skb_set_delivery_time), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5469,6 +5501,12 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_DELIVERY_TIME_NONE, + BPF_SKB_DELIVERY_TIME_UNSPEC, + BPF_SKB_DELIVERY_TIME_MONO, +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -5509,7 +5547,8 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u32 :32; /* Padding, future use. */ + __u8 delivery_time_type; + __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; -- cgit v1.2.3 From 51ef2be546e2e480e56fdb59fdeb9a4406e8d52e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 17 Feb 2022 16:44:07 +0100 Subject: media: i2c: isl7998x: Add driver for Intersil ISL7998x Add driver for the Intersil ISL7998x Analog to MIPI CSI-2/BT656 decoder. This chip supports 1/2/4 analog video inputs and converts them into 1/2/4 VCs in MIPI CSI2 stream. This driver currently supports ISL79987 and both 720x480 and 720x576 resolutions, however as per specification, all inputs must use the same resolution and standard. The only supported pixel format is now YUYV/YUV422. The chip should support RGB565 on the CSI2 as well, but this is currently unsupported. Signed-off-by: Marek Vasut Cc: Sakari Ailus Cc: Mauro Carvalho Chehab Cc: Rob Herring To: linux-media@vger.kernel.org Signed-off-by: Michael Tretter Acked-by: Hans Verkuil [Sakari Ailus: Always call pm_runtime_get_and_resume in pre_streamon] Signed-off-by: Sakari Ailus --- include/uapi/linux/v4l2-controls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c8e0f84d204d..92576ed03fc4 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -219,6 +219,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_ALLEGRO_BASE (V4L2_CID_USER_BASE + 0x1170) +/* + * The base for the isl7998x driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_ISL7998X_BASE (V4L2_CID_USER_BASE + 0x1180) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From bfa26ba343c727e055223be04e08f2ebdd43c293 Mon Sep 17 00:00:00 2001 From: William Mahon Date: Thu, 3 Mar 2022 18:23:42 -0800 Subject: HID: add mapping for KEY_DICTATE Numerous keyboards are adding dictate keys which allows for text messages to be dictated by a microphone. This patch adds a new key definition KEY_DICTATE and maps 0x0c/0x0d8 usage code to this new keycode. Additionally hid-debug is adjusted to recognize this new usage code as well. Signed-off-by: William Mahon Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220303021501.1.I5dbf50eb1a7a6734ee727bda4a8573358c6d3ec0@changeid Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 225ec87d4f22..4db5d41848e4 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -612,6 +612,7 @@ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ +#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- cgit v1.2.3 From 327b89f0acc4c20a06ed59e4d9af7f6d804dc2e2 Mon Sep 17 00:00:00 2001 From: William Mahon Date: Thu, 3 Mar 2022 18:26:22 -0800 Subject: HID: add mapping for KEY_ALL_APPLICATIONS This patch adds a new key definition for KEY_ALL_APPLICATIONS and aliases KEY_DASHBOARD to it. It also maps the 0x0c/0x2a2 usage code to KEY_ALL_APPLICATIONS. Signed-off-by: William Mahon Acked-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220303035618.1.I3a7746ad05d270161a18334ae06e3b6db1a1d339@changeid Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4db5d41848e4..7989d9483ea7 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -278,7 +278,8 @@ #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 -#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ +#define KEY_DASHBOARD KEY_ALL_APPLICATIONS #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 -- cgit v1.2.3 From 96ba61ee5331eb6e2f4c2baeb994b9ceb01d8266 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 20 Feb 2022 21:46:16 +0100 Subject: media: v4l2-ctrls: Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags Add new V4L2_H264_DECODE_PARAM_FLAG_P/BFRAME flags that are needed by NVIDIA Tegra video decoder. Userspace will have to set these flags in accordance to the type of a decoded frame. Reviewed-by: Nicolas Dufresne Signed-off-by: Dmitry Osipenko Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index c8e0f84d204d..e3d48d571062 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1563,6 +1563,8 @@ struct v4l2_h264_dpb_entry { #define V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC 0x01 #define V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC 0x02 #define V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD 0x04 +#define V4L2_H264_DECODE_PARAM_FLAG_PFRAME 0x08 +#define V4L2_H264_DECODE_PARAM_FLAG_BFRAME 0x10 #define V4L2_CID_STATELESS_H264_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 7) /** -- cgit v1.2.3 From 1092347165cf5ed1453c1f211641a859818a2828 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 9 Feb 2022 17:03:12 +0100 Subject: media: lirc: remove unused feature LIRC_CAN_SET_REC_DUTY_CYCLE There is no hardware which can filter input on the duty cycle, so no driver implements this. On top of that, LIRC_CAN_SET_REC_DUTY_CYCLE has the same value as LIRC_CAN_MEASURE_CARRIER (0x02000000). Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/lirc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 21c69a6a100d..23b0f2c8ba81 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -73,7 +73,6 @@ #define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) #define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) #define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 #define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -- cgit v1.2.3 From 72a74c8f0a0df12c7d7ea012aa70d95152858dea Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Thu, 24 Feb 2022 11:10:00 +0800 Subject: media: add nv12m_8l128 and nv12m_10be_8l128 video format. nv12m_8l128 is 8-bit tiled nv12 format used by amphion decoder. nv12m_10be_8l128 is 10-bit tiled format used by amphion decoder. The tile size is 8x128 Signed-off-by: Ming Qian Signed-off-by: Shijie Qin Signed-off-by: Zhou Peng Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index df8b9c486ba1..3768a0a80830 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -632,6 +632,8 @@ struct v4l2_pix_format { /* Tiled YUV formats, non contiguous planes */ #define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ #define V4L2_PIX_FMT_NV12MT_16X16 v4l2_fourcc('V', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ +#define V4L2_PIX_FMT_NV12M_8L128 v4l2_fourcc('N', 'A', '1', '2') /* Y/CbCr 4:2:0 8x128 tiles */ +#define V4L2_PIX_FMT_NV12M_10BE_8L128 v4l2_fourcc_be('N', 'T', '1', '2') /* Y/CbCr 4:2:0 10-bit 8x128 tiles */ /* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */ #define V4L2_PIX_FMT_SBGGR8 v4l2_fourcc('B', 'A', '8', '1') /* 8 BGBG.. GRGR.. */ -- cgit v1.2.3 From d045b9eb95a9b611c483897a69e7285aefdc66d7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 7 Mar 2022 12:44:36 -0800 Subject: mptcp: introduce implicit endpoints In some edge scenarios, an MPTCP subflows can use a local address mapped by a "implicit" endpoint created by the in-kernel path manager. Such endpoints presence can be confusing, as it's creation is hard to track and will prevent the later endpoint creation from the user-space using the same address. Define a new endpoint flag to mark implicit endpoints and allow the user-space to replace implicit them with user-provided data at endpoint creation time. Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index f106a3941cdf..9690efedb5fa 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -81,6 +81,7 @@ enum { #define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) #define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) #define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) +#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4) enum { MPTCP_PM_CMD_UNSPEC, -- cgit v1.2.3 From b530e9e1063ed2b817eae7eec6ed2daa8be11608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Mar 2022 11:53:42 +0100 Subject: bpf: Add "live packet" mode for XDP in BPF_PROG_RUN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for running XDP programs through BPF_PROG_RUN in a mode that enables live packet processing of the resulting frames. Previous uses of BPF_PROG_RUN for XDP returned the XDP program return code and the modified packet data to userspace, which is useful for unit testing of XDP programs. The existing BPF_PROG_RUN for XDP allows userspace to set the ingress ifindex and RXQ number as part of the context object being passed to the kernel. This patch reuses that code, but adds a new mode with different semantics, which can be selected with the new BPF_F_TEST_XDP_LIVE_FRAMES flag. When running BPF_PROG_RUN in this mode, the XDP program return codes will be honoured: returning XDP_PASS will result in the frame being injected into the networking stack as if it came from the selected networking interface, while returning XDP_TX and XDP_REDIRECT will result in the frame being transmitted out that interface. XDP_TX is translated into an XDP_REDIRECT operation to the same interface, since the real XDP_TX action is only possible from within the network drivers themselves, not from the process context where BPF_PROG_RUN is executed. Internally, this new mode of operation creates a page pool instance while setting up the test run, and feeds pages from that into the XDP program. The setup cost of this is amortised over the number of repetitions specified by userspace. To support the performance testing use case, we further optimise the setup step so that all pages in the pool are pre-initialised with the packet data, and pre-computed context and xdp_frame objects stored at the start of each page. This makes it possible to entirely avoid touching the page content on each XDP program invocation, and enables sending up to 9 Mpps/core on my test box. Because the data pages are recycled by the page pool, and the test runner doesn't re-initialise them for each run, subsequent invocations of the XDP program will see the packet data in the state it was after the last time it ran on that particular page. This means that an XDP program that modifies the packet before redirecting it has to be careful about which assumptions it makes about the packet content, but that is only an issue for the most naively written programs. Enabling the new flag is only allowed when not setting ctx_out and data_out in the test specification, since using it means frames will be redirected somewhere else, so they can't be returned. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220309105346.100053-2-toke@redhat.com --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4eebea830613..bc23020b638d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1232,6 +1232,8 @@ enum { /* If set, run the test on the cpu specified by bpf_attr.test.cpu */ #define BPF_F_TEST_RUN_ON_CPU (1U << 0) +/* If set, XDP frames will be transmitted after processing */ +#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { @@ -1393,6 +1395,7 @@ union bpf_attr { __aligned_u64 ctx_out; __u32 flags; __u32 cpu; + __u32 batch_size; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ -- cgit v1.2.3 From 530e0d46c61314c59ecfdb8d3bcb87edbc0f85d3 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 9 Mar 2022 13:04:13 +0100 Subject: can: isotp: set default value for N_As to 50 micro seconds The N_As value describes the time a CAN frame needs on the wire when transmitted by the CAN controller. Even very short CAN FD frames need arround 100 usecs (bitrate 1Mbit/s, data bitrate 8Mbit/s). Having N_As to be zero (the former default) leads to 'no CAN frame separation' when STmin is set to zero by the receiving node. This 'burst mode' should not be enabled by default as it could potentially dump a high number of CAN frames into the netdev queue from the soft hrtimer context. This does not affect the system stability but is just not nice and cooperative. With this N_As/frame_txtime value the 'burst mode' is disabled by default. As user space applications usually do not set the frame_txtime element of struct can_isotp_options the new in-kernel default is very likely overwritten with zero when the sockopt() CAN_ISOTP_OPTS is invoked. To make sure that a N_As value of zero is only set intentional the value '0' is now interpreted as 'do not change the current value'. When a frame_txtime of zero is required for testing purposes this CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. Link: https://lore.kernel.org/all/20220309120416.83514-2-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index c55935b64ccc..590f8aea2b6d 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,20 +137,16 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ -/* default values */ +/* protocol machine default values */ #define CAN_ISOTP_DEFAULT_FLAGS 0 #define CAN_ISOTP_DEFAULT_EXT_ADDRESS 0x00 #define CAN_ISOTP_DEFAULT_PAD_CONTENT 0xCC /* prevent bit-stuffing */ -#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 0 +#define CAN_ISOTP_DEFAULT_FRAME_TXTIME 50000 /* 50 micro seconds */ #define CAN_ISOTP_DEFAULT_RECV_BS 0 #define CAN_ISOTP_DEFAULT_RECV_STMIN 0x00 #define CAN_ISOTP_DEFAULT_RECV_WFTMAX 0 -#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU -#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN -#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 - /* * Remark on CAN_ISOTP_DEFAULT_RECV_* values: * @@ -162,4 +158,24 @@ struct can_isotp_ll_options { * consistency and copied directly into the flow control (FC) frame. */ +/* link layer default values => make use of Classical CAN frames */ + +#define CAN_ISOTP_DEFAULT_LL_MTU CAN_MTU +#define CAN_ISOTP_DEFAULT_LL_TX_DL CAN_MAX_DLEN +#define CAN_ISOTP_DEFAULT_LL_TX_FLAGS 0 + +/* + * The CAN_ISOTP_DEFAULT_FRAME_TXTIME has become a non-zero value as + * it only makes sense for isotp implementation tests to run without + * a N_As value. As user space applications usually do not set the + * frame_txtime element of struct can_isotp_options the new in-kernel + * default is very likely overwritten with zero when the sockopt() + * CAN_ISOTP_OPTS is invoked. + * To make sure that a N_As value of zero is only set intentional the + * value '0' is now interpreted as 'do not change the current value'. + * When a frame_txtime of zero is required for testing purposes this + * CAN_ISOTP_FRAME_TXTIME_ZERO u32 value has to be set in frame_txtime. + */ +#define CAN_ISOTP_FRAME_TXTIME_ZERO 0xFFFFFFFF + #endif /* !_UAPI_CAN_ISOTP_H */ -- cgit v1.2.3 From e7a6c00dc77aedf27a601738ea509f1caea6d673 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 4 Mar 2022 08:22:22 -0700 Subject: io_uring: add support for registering ring file descriptors Lots of workloads use multiple threads, in which case the file table is shared between them. This makes getting and putting the ring file descriptor for each io_uring_enter(2) system call more expensive, as it involves an atomic get and put for each call. Similarly to how we allow registering normal file descriptors to avoid this overhead, add support for an io_uring_register(2) API that allows to register the ring fds themselves: 1) IORING_REGISTER_RING_FDS - takes an array of io_uring_rsrc_update structs, and registers them with the task. 2) IORING_UNREGISTER_RING_FDS - takes an array of io_uring_src_update structs, and unregisters them. When a ring fd is registered, it is internally represented by an offset. This offset is returned to the application, and the application then uses this offset and sets IORING_ENTER_REGISTERED_RING for the io_uring_enter(2) system call. This works just like using a registered file descriptor, rather than a real one, in an SQE, where IOSQE_FIXED_FILE gets set to tell io_uring that we're using an internal offset/descriptor rather than a real file descriptor. In initial testing, this provides a nice bump in performance for threaded applications in real world cases where the batch count (eg number of requests submitted per io_uring_enter(2) invocation) is low. In a microbenchmark, submitting NOP requests, we see the following increases in performance: Requests per syscall Baseline Registered Increase ---------------------------------------------------------------- 1 ~7030K ~8080K +15% 2 ~13120K ~14800K +13% 4 ~22740K ~25300K +11% Co-developed-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 787f491f0d2a..42b2fe84dbcd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -257,10 +257,11 @@ struct io_cqring_offsets { /* * io_uring_enter(2) flags */ -#define IORING_ENTER_GETEVENTS (1U << 0) -#define IORING_ENTER_SQ_WAKEUP (1U << 1) -#define IORING_ENTER_SQ_WAIT (1U << 2) -#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_GETEVENTS (1U << 0) +#define IORING_ENTER_SQ_WAKEUP (1U << 1) +#define IORING_ENTER_SQ_WAIT (1U << 2) +#define IORING_ENTER_EXT_ARG (1U << 3) +#define IORING_ENTER_REGISTERED_RING (1U << 4) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -325,6 +326,10 @@ enum { /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, + /* register/unregister io_uring fd with the ring */ + IORING_REGISTER_RING_FDS = 20, + IORING_UNREGISTER_RING_FDS = 21, + /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From 4f57f06ce2186c31c3da52386125dc57b1cd6f96 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2022 06:27:26 -0700 Subject: io_uring: add support for IORING_OP_MSG_RING command This adds support for IORING_OP_MSG_RING, which allows an SQE to signal another ring. That allows either waking up someone waiting on the ring, or even passing a 64-bit value via the user_data field in the CQE. sqe->fd must contain the fd of a ring that should receive the CQE. sqe->off will be propagated to the cqe->user_data on the target ring, and sqe->len will be propagated to cqe->res. The results CQE will have IORING_CQE_F_MSG set in its flags, to indicate that this CQE was generated from a messaging request rather than a SQE issued locally on that ring. This effectively allows passing a 64-bit and a 32-bit quantify between the two rings. This request type has the following request specific error cases: - -EBADFD. Set if the sqe->fd doesn't point to a file descriptor that is of the io_uring type. - -EOVERFLOW. Set if we were not able to deliver a request to the target ring. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 42b2fe84dbcd..8bd4bfdd9a89 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -143,6 +143,7 @@ enum { IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT, + IORING_OP_MSG_RING, /* this goes last, obviously */ IORING_OP_LAST, @@ -199,9 +200,11 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries + * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) +#define IORING_CQE_F_MSG (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From 153474ba1a4aed0a7b797b4c2be8c35c7a4e57bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 11:46:37 -0600 Subject: ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h Rename tracehook_report_syscall_{entry,exit} to ptrace_report_syscall_{entry,exit} and place them in ptrace.h There is no longer any generic tracehook infractructure so make these ptrace specific functions ptrace specific. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 3747bf816f9a..b7af92e07d1f 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by tracehook_report_syscall_* to describe the current syscall-stop. + * by ptrace_report_syscall_* to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 -- cgit v1.2.3 From bcbb7bf6ccde7cb969a5642879832bc84ebf06a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2022 12:59:35 -0700 Subject: io_uring: allow submissions to continue on error By default, io_uring will stop submitting a batch of requests if we run into an error submitting a request. This isn't strictly necessary, as the error result is passed out-of-band via a CQE anyway. And it can be a bit confusing for some applications. Provide a way to setup a ring that will continue submitting on error, when the error CQE has been posted. There's still one case that will break out of submission. If we fail allocating a request, then we'll still return -ENOMEM. We could in theory post a CQE for that condition too even if we never got a request. Leave that for a potential followup. Reported-by: Dylan Yudaken Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8bd4bfdd9a89..d2be4eb22008 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -101,6 +101,7 @@ enum { #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ enum { IORING_OP_NOP, -- cgit v1.2.3 From 9bb984f28d5bcb917d35d930fcfb89f90f9449fd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 9 Mar 2022 01:05:09 -0800 Subject: bpf: Remove BPF_SKB_DELIVERY_TIME_NONE and rename s/delivery_time_/tstamp_/ This patch is to simplify the uapi bpf.h regarding to the tstamp type and use a similar way as the kernel to describe the value stored in __sk_buff->tstamp. My earlier thought was to avoid describing the semantic and clock base for the rcv timestamp until there is more clarity on the use case, so the __sk_buff->delivery_time_type naming instead of __sk_buff->tstamp_type. With some thoughts, it can reuse the UNSPEC naming. This patch first removes BPF_SKB_DELIVERY_TIME_NONE and also rename BPF_SKB_DELIVERY_TIME_UNSPEC to BPF_SKB_TSTAMP_UNSPEC and BPF_SKB_DELIVERY_TIME_MONO to BPF_SKB_TSTAMP_DELIVERY_MONO. The semantic of BPF_SKB_TSTAMP_DELIVERY_MONO is the same: __sk_buff->tstamp has delivery time in mono clock base. BPF_SKB_TSTAMP_UNSPEC means __sk_buff->tstamp has the (rcv) tstamp at ingress and the delivery time at egress. At egress, the clock base could be found from skb->sk->sk_clockid. __sk_buff->tstamp == 0 naturally means NONE, so NONE is not needed. With BPF_SKB_TSTAMP_UNSPEC for the rcv tstamp at ingress, the __sk_buff->delivery_time_type is also renamed to __sk_buff->tstamp_type which was also suggested in the earlier discussion: https://lore.kernel.org/bpf/b181acbe-caf8-502d-4b7b-7d96b9fc5d55@iogearbox.net/ The above will then make __sk_buff->tstamp and __sk_buff->tstamp_type the same as its kernel skb->tstamp and skb->mono_delivery_time counter part. The internal kernel function bpf_skb_convert_dtime_type_read() is then renamed to bpf_skb_convert_tstamp_type_read() and it can be simplified with the BPF_SKB_DELIVERY_TIME_NONE gone. A BPF_ALU32_IMM(BPF_AND) insn is also saved by using BPF_JMP32_IMM(BPF_JSET). The bpf helper bpf_skb_set_delivery_time() is also renamed to bpf_skb_set_tstamp(). The arg name is changed from dtime to tstamp also. It only allows setting tstamp 0 for BPF_SKB_TSTAMP_UNSPEC and it could be relaxed later if there is use case to change mono delivery time to non mono. prog->delivery_time_access is also renamed to prog->tstamp_type_access. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220309090509.3712315-1-kafai@fb.com --- include/uapi/linux/bpf.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc23020b638d..d288a0a9f797 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5090,23 +5090,22 @@ union bpf_attr { * 0 on success, or a negative error in case of failure. On error * *dst* buffer is zeroed out. * - * long bpf_skb_set_delivery_time(struct sk_buff *skb, u64 dtime, u32 dtime_type) + * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type) * Description - * Set a *dtime* (delivery time) to the __sk_buff->tstamp and also - * change the __sk_buff->delivery_time_type to *dtime_type*. + * Change the __sk_buff->tstamp_type to *tstamp_type* + * and set *tstamp* to the __sk_buff->tstamp together. * - * When setting a delivery time (non zero *dtime*) to - * __sk_buff->tstamp, only BPF_SKB_DELIVERY_TIME_MONO *dtime_type* - * is supported. It is the only delivery_time_type that will be - * kept after bpf_redirect_*(). - * - * If there is no need to change the __sk_buff->delivery_time_type, - * the delivery time can be directly written to __sk_buff->tstamp + * If there is no need to change the __sk_buff->tstamp_type, + * the tstamp value can be directly written to __sk_buff->tstamp * instead. * - * *dtime* 0 and *dtime_type* BPF_SKB_DELIVERY_TIME_NONE - * can be used to clear any delivery time stored in - * __sk_buff->tstamp. + * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that + * will be kept during bpf_redirect_*(). A non zero + * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO + * *tstamp_type*. + * + * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used + * with a zero *tstamp*. * * Only IPv4 and IPv6 skb->protocol are supported. * @@ -5119,7 +5118,7 @@ union bpf_attr { * Return * 0 on success. * **-EINVAL** for invalid input - * **-EOPNOTSUPP** for unsupported delivery_time_type and protocol + * **-EOPNOTSUPP** for unsupported protocol */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5314,7 +5313,7 @@ union bpf_attr { FN(xdp_load_bytes), \ FN(xdp_store_bytes), \ FN(copy_from_user_task), \ - FN(skb_set_delivery_time), \ + FN(skb_set_tstamp), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5505,9 +5504,12 @@ union { \ } __attribute__((aligned(8))) enum { - BPF_SKB_DELIVERY_TIME_NONE, - BPF_SKB_DELIVERY_TIME_UNSPEC, - BPF_SKB_DELIVERY_TIME_MONO, + BPF_SKB_TSTAMP_UNSPEC, + BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ + /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, + * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC + * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + */ }; /* user accessible mirror of in-kernel sk_buff. @@ -5550,7 +5552,7 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; - __u8 delivery_time_type; + __u8 tstamp_type; __u32 :24; /* Padding, future use. */ __u64 hwtstamp; }; -- cgit v1.2.3 From 58617014405ad5c9f94f464444f4972dabb71ca7 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 10 Mar 2022 23:53:35 +0800 Subject: bpf: Fix comment for helper bpf_current_task_under_cgroup() Fix the descriptions of the return values of helper bpf_current_task_under_cgroup(). Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Hengqi Chen Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220310155335.1278783-1-hengqi.chen@gmail.com --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d288a0a9f797..e9978a916c3e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2302,8 +2302,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if current task belongs to the cgroup2. - * * 1, if current task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) -- cgit v1.2.3 From 174b16946e39ebd369097e0f773536c91a8c1a4c Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 2 Mar 2022 12:13:58 +0100 Subject: bpf-lsm: Introduce new helper bpf_ima_file_hash() ima_file_hash() has been modified to calculate the measurement of a file on demand, if it has not been already performed by IMA or the measurement is not fresh. For compatibility reasons, ima_inode_hash() remains unchanged. Keep the same approach in eBPF and introduce the new helper bpf_ima_file_hash() to take advantage of the modified behavior of ima_file_hash(). Signed-off-by: Roberto Sassu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220302111404.193900-4-roberto.sassu@huawei.com --- include/uapi/linux/bpf.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e9978a916c3e..99fab54ae9c0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5119,6 +5119,16 @@ union bpf_attr { * 0 on success. * **-EINVAL** for invalid input * **-EOPNOTSUPP** for unsupported protocol + * + * long bpf_ima_file_hash(struct file *file, void *dst, u32 size) + * Description + * Returns a calculated IMA hash of the *file*. + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if + * invalid arguments are passed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5314,6 +5324,7 @@ union bpf_attr { FN(xdp_store_bytes), \ FN(copy_from_user_task), \ FN(skb_set_tstamp), \ + FN(ima_file_hash), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 9 Mar 2022 23:20:33 +0100 Subject: net: openvswitch: fix uAPI incompatibility with existing user space Few years ago OVS user space made a strange choice in the commit [1] to define types only valid for the user space inside the copy of a kernel uAPI header. '#ifndef __KERNEL__' and another attribute was added later. This leads to the inevitable clash between user space and kernel types when the kernel uAPI is extended. The issue was unveiled with the addition of a new type for IPv6 extension header in kernel uAPI. When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the older user space application, application tries to parse it as OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as malformed. Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with every IPv6 packet that goes to the user space, IPv6 support is fully broken. Fixing that by bringing these user space attributes to the kernel uAPI to avoid the clash. Strictly speaking this is not the problem of the kernel uAPI, but changing it is the only way to avoid breakage of the older user space applications at this point. These 2 types are explicitly rejected now since they should not be passed to the kernel. Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved out from the '#ifdef __KERNEL__' as there is no good reason to hide it from the userspace. And it's also explicitly rejected now, because it's for in-kernel use only. Comments with warnings were added to avoid the problem coming back. (1 << type) converted to (1ULL << type) to avoid integer overflow on OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now. [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline") Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support") Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c Reported-by: Roi Dayan Signed-off-by: Ilya Maximets Acked-by: Nicolas Dichtel Acked-by: Aaron Conole Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 9d1710f20505..ce3e1738d427 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -351,11 +351,21 @@ enum ovs_key_attr { OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ - OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ -#ifdef __KERNEL__ - OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ -#endif + /* User space decided to squat on types 29 and 30. They are defined + * below, but should not be sent to the kernel. + * + * WARNING: No new types should be added unless they are defined + * for both kernel and user space (no 'ifdef's). It's hard + * to keep compatibility otherwise. + */ + OVS_KEY_ATTR_PACKET_TYPE, /* be32 packet type */ + OVS_KEY_ATTR_ND_EXTENSIONS, /* IPv6 Neighbor Discovery extensions */ + + OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info. + * For in-kernel use only. + */ + OVS_KEY_ATTR_IPV6_EXTHDRS, /* struct ovs_key_ipv6_exthdr */ __OVS_KEY_ATTR_MAX }; -- cgit v1.2.3 From 2916b7a9c7c25ecf9be2f37e567a277e861f8e3f Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Tue, 22 Feb 2022 20:36:39 +0530 Subject: nl80211: fix typo of NL80211_IF_TYPE_OCB in documentation It should be NL80211_IFTYPE_OCB instead. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1645542399-4680-1-git-send-email-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98ed52663d6b..0568a79097b8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3259,7 +3259,7 @@ enum nl80211_attrs { * and therefore can't be created in the normal ways, use the * %NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE * commands to create and destroy one - * @NL80211_IF_TYPE_OCB: Outside Context of a BSS + * @NL80211_IFTYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) * @NL80211_IFTYPE_MAX: highest interface type number currently defined -- cgit v1.2.3 From b20dc3c684580ddc07eb48ee3c3dc7597cd5eebf Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:42 +0100 Subject: gtp: Allow to create GTP device without FDs Currently, when the user wants to create GTP device, he has to provide file handles to the sockets created in userspace (IFLA_GTP_FD0, IFLA_GTP_FD1). This behaviour is not ideal, considering the option of adding support for GTP device creation through ip link. Ip link application is not a good place to create such sockets. This patch allows to create GTP device without providing IFLA_GTP_FD0 and IFLA_GTP_FD1 arguments. If the user sets IFLA_GTP_CREATE_SOCKETS attribute, then GTP module takes care of creating UDP sockets by itself. Sockets are created with the commonly known UDP ports used for GTP protocol (GTP0_PORT and GTP1U_PORT). In this case we don't have to provide encap_destroy because no extra deinitialization is needed, everything is covered by udp_tunnel_sock_release. Note: GTP instance created with only this change applied, does not handle GTP Echo Requests. This is implemented in the following patch. Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ddca20357e7e..ebd2aa3ef809 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -887,6 +887,7 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_CREATE_SOCKETS, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From 9af41cc33471ea1efa6f77e188f055cc77d0a5c5 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:43 +0100 Subject: gtp: Implement GTP echo response Adding GTP device through ip link creates the situation where there is no userspace daemon which would handle GTP messages (Echo Request for example). GTP-U instance which would not respond to echo requests would violate GTP specification. When GTP packet arrives with GTP_ECHO_REQ message type, GTP_ECHO_RSP is send to the sender. GTP_ECHO_RSP message should contain information element with GTPIE_RECOVERY tag and restart counter value. For GTPv1 restart counter is not used and should be equal to 0, for GTPv0 restart counter contains information provided from userspace(IFLA_GTP_RESTART_COUNT). Signed-off-by: Wojciech Drewek Suggested-by: Harald Welte Reviewed-by: Harald Welte Tested-by: Harald Welte Signed-off-by: Tony Nguyen --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ebd2aa3ef809..bd24c7dc10a2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -888,6 +888,7 @@ enum { IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, IFLA_GTP_CREATE_SOCKETS, + IFLA_GTP_RESTART_COUNT, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- cgit v1.2.3 From d33bd757d362699cfce3c68b53cd12b947d196f4 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:44 +0100 Subject: gtp: Implement GTP echo request Adding GTP device through ip link creates the situation where GTP instance is not able to send GTP echo requests. Echo requests are used to check if GTP peer is still alive. With this patch, gtp_genl_ops are extended by new cmd (GTP_CMD_ECHOREQ) which allows to send echo request in the given version of GTP protocol (v0 or v1), from the given ms address to he given peer. TID is not inclued because in all path management messages it should be equal to 0. When GTP echo response is detected, multicast message is send to everyone in the gtp_genl_family. Message contains GTP version, ms address and peer address. Suggested-by: Harald Welte Signed-off-by: Wojciech Drewek Reviewed-by: Harald Welte Signed-off-by: Tony Nguyen --- include/uapi/linux/gtp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 79f9191bbb24..2f61298a7b77 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -8,6 +8,7 @@ enum gtp_genl_cmds { GTP_CMD_NEWPDP, GTP_CMD_DELPDP, GTP_CMD_GETPDP, + GTP_CMD_ECHOREQ, GTP_CMD_MAX, }; -- cgit v1.2.3 From e3acda7ade0a36c5cbebc2b54d30b7f08a4ba29b Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 4 Mar 2022 17:40:45 +0100 Subject: net/sched: Allow flower to match on GTP options Options are as follows: PDU_TYPE:QFI and they refernce to the fields from the PDU Session Protocol. PDU Session data is conveyed in GTP-U Extension Header. GTP-U Extension Header is described in 3GPP TS 29.281. PDU Session Protocol is described in 3GPP TS 38.415. PDU_TYPE - indicates the type of the PDU Session Information (4 bits) QFI - QoS Flow Identifier (6 bits) # ip link add gtp_dev type gtp role sgsn # tc qdisc add dev gtp_dev ingress # tc filter add dev gtp_dev protocol ip parent ffff: \ flower \ enc_key_id 11 \ gtp_opts 1:8/ff:ff \ action mirred egress redirect dev eth0 Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- include/uapi/linux/if_tunnel.h | 4 +++- include/uapi/linux/pkt_cls.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 7d9105533c7b..102119628ff5 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -176,8 +176,10 @@ enum { #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) #define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) +#define TUNNEL_GTP_OPT __cpu_to_be16(0x8000) #define TUNNEL_OPTIONS_PRESENT \ - (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT | \ + TUNNEL_GTP_OPT) #endif /* _UAPI_IF_TUNNEL_H_ */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ee38b35c3f57..404f97fb239c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -616,6 +616,10 @@ enum { * TCA_FLOWER_KEY_ENC_OPT_ERSPAN_ * attributes */ + TCA_FLOWER_KEY_ENC_OPTS_GTP, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GTP_ + * attributes + */ __TCA_FLOWER_KEY_ENC_OPTS_MAX, }; @@ -654,6 +658,17 @@ enum { #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \ (__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1) +enum { + TCA_FLOWER_KEY_ENC_OPT_GTP_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GTP_QFI, /* u8 */ + + __TCA_FLOWER_KEY_ENC_OPT_GTP_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GTP_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GTP_MAX - 1) + enum { TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC, TCA_FLOWER_KEY_MPLS_OPTS_LSE, -- cgit v1.2.3 From 3b6c6c039707f6bb7c64af2aa82a437fabb93aee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 Mar 2022 19:49:48 -0800 Subject: nvdimm/region: Delete nd_blk_region infrastructure Now that the nd_namespace_blk infrastructure is removed, delete all the region machinery to coordinate provisioning aliased capacity between PMEM and BLK. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/164688418803.2879318.1302315202397235855.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 8cf1e4884fd5..17e02b64ea2e 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -189,7 +189,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ #define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ #define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ -#define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ #define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */ enum nd_driver_flags { @@ -198,7 +197,6 @@ enum nd_driver_flags { ND_DRIVER_REGION_BLK = 1 << ND_DEVICE_REGION_BLK, ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, - ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; -- cgit v1.2.3 From 8109517b394e6deab5fd21cc5460e82ffed229c6 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 24 Jan 2022 11:25:24 +0100 Subject: rpmsg: ctrl: Introduce new RPMSG_CREATE/RELEASE_DEV_IOCTL controls Allow the user space application to create and release an rpmsg device by adding RPMSG_CREATE_DEV_IOCTL and RPMSG_RELEASE_DEV_IOCTL ioctrls to the /dev/rpmsg_ctrl interface The RPMSG_CREATE_DEV_IOCTL Ioctl can be used to instantiate a local rpmsg device. Depending on the back-end implementation, the associated rpmsg driver is probed and a NS announcement can be sent to the remote processor. The RPMSG_RELEASE_DEV_IOCTL allows the user application to release a rpmsg device created either by the remote processor or with the RPMSG_CREATE_DEV_IOCTL call. Depending on the back-end implementation, the associated rpmsg driver is removed and a NS destroy rpmsg can be sent to the remote processor. Suggested-by: Mathieu Poirier Signed-off-by: Arnaud Pouliquen Reviewed-by: Mathieu Poirier Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220124102524.295783-12-arnaud.pouliquen@foss.st.com --- include/uapi/linux/rpmsg.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rpmsg.h b/include/uapi/linux/rpmsg.h index f5ca8740f3fb..1637e68177d9 100644 --- a/include/uapi/linux/rpmsg.h +++ b/include/uapi/linux/rpmsg.h @@ -33,4 +33,14 @@ struct rpmsg_endpoint_info { */ #define RPMSG_DESTROY_EPT_IOCTL _IO(0xb5, 0x2) +/** + * Instantiate a new local rpmsg service device. + */ +#define RPMSG_CREATE_DEV_IOCTL _IOW(0xb5, 0x3, struct rpmsg_endpoint_info) + +/** + * Release a local rpmsg device. + */ +#define RPMSG_RELEASE_DEV_IOCTL _IOW(0xb5, 0x4, struct rpmsg_endpoint_info) + #endif -- cgit v1.2.3 From b5fdf66f6eb2560784c6f60131dc567de06267dc Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Tue, 1 Mar 2022 14:37:27 -0500 Subject: NFS: Remove remaining dfprintks related to fscache and remove NFSDBG_FSCACHE The fscache cookie APIs including fscache_acquire_cookie() and fscache_relinquish_cookie() now have very good tracing. Thus, there is no real need for dfprintks in the NFS fscache interface. The NFS fscache interface has removed all dfprintks so remove the NFSDBG_FSCACHE defines. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/uapi/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfs_fs.h b/include/uapi/linux/nfs_fs.h index 3afe3767c55d..ae0de165c014 100644 --- a/include/uapi/linux/nfs_fs.h +++ b/include/uapi/linux/nfs_fs.h @@ -52,7 +52,7 @@ #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 #define NFSDBG_MOUNT 0x0400 -#define NFSDBG_FSCACHE 0x0800 +#define NFSDBG_FSCACHE 0x0800 /* unused */ #define NFSDBG_PNFS 0x1000 #define NFSDBG_PNFS_LD 0x2000 #define NFSDBG_STATE 0x4000 -- cgit v1.2.3 From 2c7d2a230237e7c43fa067d695937b7e484bb92a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:39:58 -0500 Subject: btrfs: add definition for EXTENT_TREE_V2 This adds the initial definition of the EXTENT_TREE_V2 incompat feature flag. This also hides the support behind CONFIG_BTRFS_DEBUG. THIS IS A IN DEVELOPMENT FORMAT CHANGE, DO NOT USE UNLESS YOU ARE A DEVELOPER OR A TESTER. The format is in flux and will be added in stages, any fs will need to be re-made between updates to the format. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 738619994e26..1cb1a3860f1d 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) +#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13) struct btrfs_ioctl_feature_flags { __u64 compat_flags; -- cgit v1.2.3 From 9c54e80ddc6bd89596a4046d451908700476fd14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 15 Dec 2021 15:40:07 -0500 Subject: btrfs: add code to support the block group root This code adds the on disk structures for the block group root, which will hold the block group items for extent tree v2. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 5416f1f1a77a..b069752a8ecf 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -53,6 +53,9 @@ /* tracks free space in block groups. */ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL +/* Holds the block group items for extent tree v2. */ +#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL + /* device stats in the device tree */ #define BTRFS_DEV_STATS_OBJECTID 0ULL -- cgit v1.2.3 From dcb77a9ae87dc1ae2c54ea2e629da357e694b664 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Aug 2021 15:58:29 -0700 Subject: btrfs: add definitions and documentation for encoded I/O ioctls In order to allow sending and receiving compressed data without decompressing it, we need an interface to write pre-compressed data directly to the filesystem and the matching interface to read compressed data without decompressing it. This adds the definitions for ioctls to do that and detailed explanations of how to use them. Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 132 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 1cb1a3860f1d..d956b2993970 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -869,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args { __u8 align[7]; }; +/* + * Data and metadata for an encoded read or write. + * + * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g., + * compression). This can be used to read the compressed contents of a file or + * write pre-compressed data directly to a file. + * + * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially + * preadv/pwritev with additional metadata about how the data is encoded and the + * size of the unencoded data. + * + * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills + * the metadata fields, and returns the size of the encoded data. It reads one + * extent per call. It can also read data which is not encoded. + * + * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data + * from the iovecs, and returns the size of the encoded data. Note that the + * encoded data is not validated when it is written; if it is not valid (e.g., + * it cannot be decompressed), then a subsequent read may return an error. + * + * Since the filesystem page cache contains decoded data, encoded I/O bypasses + * the page cache. Encoded I/O requires CAP_SYS_ADMIN. + */ +struct btrfs_ioctl_encoded_io_args { + /* Input parameters for both reads and writes. */ + + /* + * iovecs containing encoded data. + * + * For reads, if the size of the encoded data is larger than the sum of + * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with + * ENOBUFS. + * + * For writes, the size of the encoded data is the sum of iov[n].iov_len + * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may + * increase in the future). This must also be less than or equal to + * unencoded_len. + */ + const struct iovec __user *iov; + /* Number of iovecs. */ + unsigned long iovcnt; + /* + * Offset in file. + * + * For writes, must be aligned to the sector size of the filesystem. + */ + __s64 offset; + /* Currently must be zero. */ + __u64 flags; + + /* + * For reads, the following members are output parameters that will + * contain the returned metadata for the encoded data. + * For writes, the following members must be set to the metadata for the + * encoded data. + */ + + /* + * Length of the data in the file. + * + * Must be less than or equal to unencoded_len - unencoded_offset. For + * writes, must be aligned to the sector size of the filesystem unless + * the data ends at or beyond the current end of the file. + */ + __u64 len; + /* + * Length of the unencoded (i.e., decrypted and decompressed) data. + * + * For writes, must be no more than 128 KiB (this limit may increase in + * the future). If the unencoded data is actually longer than + * unencoded_len, then it is truncated; if it is shorter, then it is + * extended with zeroes. + */ + __u64 unencoded_len; + /* + * Offset from the first byte of the unencoded data to the first byte of + * logical data in the file. + * + * Must be less than unencoded_len. + */ + __u64 unencoded_offset; + /* + * BTRFS_ENCODED_IO_COMPRESSION_* type. + * + * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE. + */ + __u32 compression; + /* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */ + __u32 encryption; + /* + * Reserved for future expansion. + * + * For reads, always returned as zero. Users should check for non-zero + * bytes. If there are any, then the kernel has a newer version of this + * structure with additional information that the user definition is + * missing. + * + * For writes, must be zeroed. + */ + __u8 reserved[64]; +}; + +/* Data is not compressed. */ +#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0 +/* Data is compressed as a single zlib stream. */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1 +/* + * Data is compressed as a single zstd frame with the windowLog compression + * parameter set to no more than 17. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2 +/* + * Data is compressed sector by sector (using the sector size indicated by the + * name of the constant) with LZO1X and wrapped in the format documented in + * fs/btrfs/lzo.c. For writes, the compression sector size must match the + * filesystem sector size. + */ +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6 +#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7 +#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8 + +/* Data is not encrypted. */ +#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0 +#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1 + /* Error codes as returned by the kernel */ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, @@ -997,5 +1125,9 @@ enum btrfs_err_code { struct btrfs_ioctl_ino_lookup_user_args) #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) +#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \ + struct btrfs_ioctl_encoded_io_args) #endif /* _UAPI_LINUX_BTRFS_H */ -- cgit v1.2.3 From 65722ff6181aa52c3d5b0929004af22a3a63e148 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 8 Mar 2022 14:00:50 -0500 Subject: drm/amdkfd: CRIU export dmabuf handles for GTT BOs Export dmabuf handles for GTT BOs so that their contents can be accessed using SDMA during checkpoint/restore. v2: Squash in fix from David to set dmabuf handle to invalid for BOs that cannot be accessed using SDMA during checkpoint/restore. Signed-off-by: David Yat Sin Reviewed-by : Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b40687bf1014..42975e940758 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -33,9 +33,10 @@ * - 1.5 - Add SVM API * - 1.6 - Query clear flags in SVM get_attr API * - 1.7 - Checkpoint Restore (CRIU) API + * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 7 +#define KFD_IOCTL_MINOR_VERSION 8 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -195,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args { __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; +#define KFD_INVALID_FD 0xffffffff + /* Matching HSA_EVENTTYPE */ #define KFD_IOC_EVENT_SIGNAL 0 #define KFD_IOC_EVENT_NODECHANGE 1 -- cgit v1.2.3 From 435fe1c0c1f74b682dba85641406abf4337aade6 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Wed, 16 Mar 2022 08:15:57 +0200 Subject: net: geneve: support IPv4/IPv6 as inner protocol This patch adds support for encapsulating IPv4/IPv6 within GENEVE. In order to use this, a new IFLA_GENEVE_INNER_PROTO_INHERIT flag needs to be provided at device creation. This property cannot be changed for the time being. In case IP traffic is received on a non-tun device the drop count is increased. Signed-off-by: Eyal Birger Link: https://lore.kernel.org/r/20220316061557.431872-1-eyal.birger@gmail.com Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bd24c7dc10a2..cc284c048e69 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -842,6 +842,7 @@ enum { IFLA_GENEVE_LABEL, IFLA_GENEVE_TTL_INHERIT, IFLA_GENEVE_DF, + IFLA_GENEVE_INNER_PROTO_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) -- cgit v1.2.3 From ec7328b59176227216c461601c6bd0e922232a9b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:43 +0100 Subject: net: bridge: mst: Multiple Spanning Tree (MST) mode Allow the user to switch from the current per-VLAN STP mode to an MST mode. Up to this point, per-VLAN STP states where always isolated from each other. This is in contrast to the MSTP standard (802.1Q-2018, Clause 13.5), where VLANs are grouped into MST instances (MSTIs), and the state is managed on a per-MSTI level, rather that at the per-VLAN level. Perhaps due to the prevalence of the standard, many switching ASICs are built after the same model. Therefore, add a corresponding MST mode to the bridge, which we can later add offloading support for in a straight-forward way. For now, all VLANs are fixed to MSTI 0, also called the Common Spanning Tree (CST). That is, all VLANs will follow the port-global state. Upcoming changes will make this actually useful by allowing VLANs to be mapped to arbitrary MSTIs and allow individual MSTI states to be changed. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 2711c3522010..30a242195ced 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -759,6 +759,7 @@ struct br_mcast_stats { enum br_boolopt_id { BR_BOOLOPT_NO_LL_LEARN, BR_BOOLOPT_MCAST_VLAN_SNOOPING, + BR_BOOLOPT_MST_ENABLE, BR_BOOLOPT_MAX }; -- cgit v1.2.3 From 8c678d60562f3e5f6d0a5f5465e27930ffedb8ca Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:44 +0100 Subject: net: bridge: mst: Allow changing a VLAN's MSTI Allow a VLAN to move out of the CST (MSTI 0), to an independent tree. The user manages the VID to MSTI mappings via a global VLAN setting. The proposed iproute2 interface would be: bridge vlan global set dev br0 vid msti Changing the state in non-zero MSTIs is still not supported, but will be addressed in upcoming changes. Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 30a242195ced..f60244b747ae 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -564,6 +564,7 @@ enum { BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE, + BRIDGE_VLANDB_GOPTS_MSTI, __BRIDGE_VLANDB_GOPTS_MAX }; #define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1) -- cgit v1.2.3 From 122c29486e1ff78033c45d0d31c710e7dc8945a5 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 16 Mar 2022 16:08:45 +0100 Subject: net: bridge: mst: Support setting and reporting MST port states Make it possible to change the port state in a given MSTI by extending the bridge port netlink interface (RTM_SETLINK on PF_BRIDGE).The proposed iproute2 interface would be: bridge mst set dev msti state Current states in all applicable MSTIs can also be dumped via a corresponding RTM_GETLINK. The proposed iproute interface looks like this: $ bridge mst port msti vb1 0 state forwarding 100 state disabled vb2 0 state forwarding 100 state forwarding The preexisting per-VLAN states are still valid in the MST mode (although they are read-only), and can be queried as usual if one is interested in knowing a particular VLAN's state without having to care about the VID to MSTI mapping (in this example VLAN 20 and 30 are bound to MSTI 100): $ bridge -d vlan port vlan-id vb1 10 state forwarding mcast_router 1 20 state disabled mcast_router 1 30 state disabled mcast_router 1 40 state forwarding mcast_router 1 vb2 10 state forwarding mcast_router 1 20 state forwarding mcast_router 1 30 state forwarding mcast_router 1 40 state forwarding mcast_router 1 Signed-off-by: Tobias Waldekranz Acked-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_bridge.h | 16 ++++++++++++++++ include/uapi/linux/rtnetlink.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index f60244b747ae..a86a7e7b811f 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -122,6 +122,7 @@ enum { IFLA_BRIDGE_VLAN_TUNNEL_INFO, IFLA_BRIDGE_MRP, IFLA_BRIDGE_CFM, + IFLA_BRIDGE_MST, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -453,6 +454,21 @@ enum { #define IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX (__IFLA_BRIDGE_CFM_CC_PEER_STATUS_MAX - 1) +enum { + IFLA_BRIDGE_MST_UNSPEC, + IFLA_BRIDGE_MST_ENTRY, + __IFLA_BRIDGE_MST_MAX, +}; +#define IFLA_BRIDGE_MST_MAX (__IFLA_BRIDGE_MST_MAX - 1) + +enum { + IFLA_BRIDGE_MST_ENTRY_UNSPEC, + IFLA_BRIDGE_MST_ENTRY_MSTI, + IFLA_BRIDGE_MST_ENTRY_STATE, + __IFLA_BRIDGE_MST_ENTRY_MAX, +}; +#define IFLA_BRIDGE_MST_ENTRY_MAX (__IFLA_BRIDGE_MST_ENTRY_MAX - 1) + struct bridge_stp_xstats { __u64 transition_blk; __u64 transition_fwd; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 51530aade46e..83849a37db5b 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -817,6 +817,7 @@ enum { #define RTEXT_FILTER_MRP (1 << 4) #define RTEXT_FILTER_CFM_CONFIG (1 << 5) #define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) /* End of information exported to user level */ -- cgit v1.2.3 From 0dcac272540613d41c05e89679e4ddb978b612f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:09 +0100 Subject: bpf: Add multi kprobe link Adding new link type BPF_LINK_TYPE_KPROBE_MULTI that attaches kprobe program through fprobe API. The fprobe API allows to attach probe on multiple functions at once very fast, because it works on top of ftrace. On the other hand this limits the probe point to the function entry or return. The kprobe program gets the same pt_regs input ctx as when it's attached through the perf API. Adding new attach type BPF_TRACE_KPROBE_MULTI that allows attachment kprobe to multiple function with new link. User provides array of addresses or symbols with count to attach the kprobe program to. The new link_create uapi interface looks like: struct { __u32 flags; __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; } kprobe_multi; The flags field allows single BPF_TRACE_KPROBE_MULTI bit to create return multi kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220316122419.933957-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 99fab54ae9c0..d77f47af7752 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -997,6 +997,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1011,6 +1012,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, MAX_BPF_LINK_TYPE, }; @@ -1118,6 +1120,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1475,6 +1482,12 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + } kprobe_multi; }; } link_create; -- cgit v1.2.3 From ca74823c6e16dd42b7cf60d9fdde80e2a81a67bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:12 +0100 Subject: bpf: Add cookie support to programs attached with kprobe multi link Adding support to call bpf_get_attach_cookie helper from kprobe programs attached with kprobe multi link. The cookie is provided by array of u64 values, where each value is paired with provided function address or symbol with the same array index. When cookie array is provided it's sorted together with addresses (check bpf_kprobe_multi_cookie_swap). This way we can find cookie based on the address in bpf_get_attach_cookie helper. Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220316122419.933957-7-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d77f47af7752..7604e7d5438f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1487,6 +1487,7 @@ union bpf_attr { __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; + __aligned_u64 cookies; } kprobe_multi; }; } link_create; -- cgit v1.2.3 From 54f586a9153201c6cff55e1f561990c78bd99aa7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 Mar 2022 21:27:51 +0100 Subject: rfkill: make new event layout opt-in Again new complaints surfaced that we had broken the ABI here, although previously all the userspace tools had agreed that it was their mistake and fixed it. Yet now there are cases (e.g. RHEL) that want to run old userspace with newer kernels, and thus are broken. Since this is a bit of a whack-a-mole thing, change the whole extensibility scheme of rfkill to no longer just rely on the message lengths, but instead require userspace to opt in via a new ioctl to a given maximum event size that it is willing to understand. By default, set that to RFKILL_EVENT_SIZE_V1 (8), so that the behaviour for userspace not calling the ioctl will look as if it's just running on an older kernel. Fixes: 14486c82612a ("rfkill: add a reason to the HW rfkill state") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220316212749.16491491b270.Ifcb1950998330a596f29a2a162e00b7546a1d6d0@changeid --- include/uapi/linux/rfkill.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 9b77cfc42efa..283c5a7b3f2c 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -159,8 +159,16 @@ struct rfkill_event_ext { * old behaviour for all userspace, unless it explicitly opts in to the * rules outlined here by using the new &struct rfkill_event_ext. * - * Userspace using &struct rfkill_event_ext must adhere to the following - * rules + * Additionally, some other userspace (bluez, g-s-d) was reading with a + * large size but as streaming reads rather than message-based, or with + * too strict checks for the returned size. So eventually, we completely + * reverted this, and extended messages need to be opted in to by using + * an ioctl: + * + * ioctl(fd, RFKILL_IOCTL_MAX_SIZE, sizeof(struct rfkill_event_ext)); + * + * Userspace using &struct rfkill_event_ext and the ioctl must adhere to + * the following rules: * * 1. accept short writes, optionally using them to detect that it's * running on an older kernel; @@ -175,6 +183,8 @@ struct rfkill_event_ext { #define RFKILL_IOC_MAGIC 'R' #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) +#define RFKILL_IOC_MAX_SIZE 2 +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) /* and that's all userspace gets */ -- cgit v1.2.3 From 73799a889262b4675799bec20a2765be6d6a3f98 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 15 Mar 2022 15:38:54 -0400 Subject: counter: add new COUNTER_EVENT_CHANGE_OF_STATE Add new counter event to notify user space about every new counter pulse. Link: https://lore.kernel.org/r/20220203135727.2374052-2-o.rempel@pengutronix.de Signed-off-by: Oleksij Rempel Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/486a5de67414470449efb84d06a2f2214f4bb31d.1647373009.git.vilhelm.gray@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index d0aa95aeff7b..96c5ffd368ad 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -61,6 +61,8 @@ enum counter_event_type { COUNTER_EVENT_THRESHOLD, /* Index signal detected */ COUNTER_EVENT_INDEX, + /* State of counter is changed */ + COUNTER_EVENT_CHANGE_OF_STATE, }; /** -- cgit v1.2.3 From 336d4b814bf078fa698488632c19beca47308896 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:15:32 -0600 Subject: ptrace: Move setting/clearing ptrace_message into ptrace_stop Today ptrace_message is easy to overlook as it not a core part of ptrace_stop. It has been overlooked so much that there are places that set ptrace_message and don't clear it, and places that never set it. So if you get an unlucky sequence of events the ptracer may be able to read a ptrace_message that does not apply to the current ptrace stop. Move setting of ptrace_message into ptrace_stop so that it always gets set before the stop, and always gets cleared after the stop. This prevents non-sense from being reported to userspace and makes ptrace_message more visible in the ptrace helper functions so that kernel developers can see it. Link: https://lkml.kernel.org/r/87bky67qfv.fsf_-_@email.froward.int.ebiederm.org Acked-by: Oleg Nesterov Reviewed-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/uapi/linux/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index b7af92e07d1f..195ae64a8c87 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by ptrace_report_syscall_* to describe the current syscall-stop. + * by ptrace_stop to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 -- cgit v1.2.3 From ee2a098851bfbe8bcdd964c0121f4246f00ff41e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 14 Mar 2022 11:20:41 -0700 Subject: bpf: Adjust BPF stack helper functions to accommodate skip > 0 Let's say that the caller has storage for num_elem stack frames. Then, the BPF stack helper functions walk the stack for only num_elem frames. This means that if skip > 0, one keeps only 'num_elem - skip' frames. This is because it sets init_nr in the perf_callchain_entry to the end of the buffer to save num_elem entries only. I believe it was because the perf callchain code unwound the stack frames until it reached the global max size (sysctl_perf_event_max_stack). However it now has perf_callchain_entry_ctx.max_stack to limit the iteration locally. This simplifies the code to handle init_nr in the BPF callstack entries and removes the confusion with the perf_event's __PERF_SAMPLE_CALLCHAIN_EARLY which sets init_nr to 0. Also change the comment on bpf_get_stack() in the header file to be more explicit what the return value means. Fixes: c195651e565a ("bpf: add bpf_get_stack helper") Signed-off-by: Namhyung Kim Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/30a7b5d5-6726-1cc2-eaee-8da2828a9a9c@oracle.com Link: https://lore.kernel.org/bpf/20220314182042.71025-1-namhyung@kernel.org Based-on-patch-by: Eugene Loh --- include/uapi/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7604e7d5438f..d14b10b85e51 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3009,8 +3009,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -4316,8 +4316,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description -- cgit v1.2.3 From 6d8491910fcd3324d0f0ece3bd68e85ead3a04d7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 1 Mar 2022 06:03:47 +0000 Subject: KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2 KVM_CAP_DISABLE_QUIRKS is irrevocably broken. The capability does not advertise the set of quirks which may be disabled to userspace, so it is impossible to predict the behavior of KVM. Worse yet, KVM_CAP_DISABLE_QUIRKS will tolerate any value for cap->args[0], meaning it fails to reject attempts to set invalid quirk bits. The only valid workaround for the quirky quirks API is to add a new CAP. Actually advertise the set of quirks that can be disabled to userspace so it can predict KVM's behavior. Reject values for cap->args[0] that contain invalid bits. Finally, add documentation for the new capability and describe the existing quirks. Signed-off-by: Oliver Upton Message-Id: <20220301060351.442881-5-oupton@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d2f1efc3aa35..91a6fe4e02c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1143,6 +1143,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_AIL_MODE_3 210 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 +#define KVM_CAP_DISABLE_QUIRKS2 213 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 824ddc601adc2cc48efb7f58b57997986c1c1276 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 22 Mar 2022 14:45:32 -0700 Subject: userfaultfd: provide unmasked address on page-fault Userfaultfd is supposed to provide the full address (i.e., unmasked) of the faulting access back to userspace. However, that is not the case for quite some time. Even running "userfaultfd_demo" from the userfaultfd man page provides the wrong output (and contradicts the man page). Notice that "UFFD_EVENT_PAGEFAULT event" shows the masked address (7fc5e30b3000) and not the first read address (0x7fc5e30b300f). Address returned by mmap() = 0x7fc5e30b3000 fault_handler_thread(): poll() returns: nready = 1; POLLIN = 1; POLLERR = 0 UFFD_EVENT_PAGEFAULT event: flags = 0; address = 7fc5e30b3000 (uffdio_copy.copy returned 4096) Read address 0x7fc5e30b300f in main(): A Read address 0x7fc5e30b340f in main(): A Read address 0x7fc5e30b380f in main(): A Read address 0x7fc5e30b3c0f in main(): A The exact address is useful for various reasons and specifically for prefetching decisions. If it is known that the memory is populated by certain objects whose size is not page-aligned, then based on the faulting address, the uffd-monitor can decide whether to prefetch and prefault the adjacent page. This bug has been for quite some time in the kernel: since commit 1a29d85eb0f1 ("mm: use vmf->address instead of of vmf->virtual_address") vmf->virtual_address"), which dates back to 2016. A concern has been raised that existing userspace application might rely on the old/wrong behavior in which the address is masked. Therefore, it was suggested to provide the masked address unless the user explicitly asks for the exact address. Add a new userfaultfd feature UFFD_FEATURE_EXACT_ADDRESS to direct userfaultfd to provide the exact address. Add a new "real_address" field to vmf to hold the unmasked address. Provide the address to userspace accordingly. Initialize real_address in various code-paths to be consistent with address, even when it is not used, to be on the safe side. [namit@vmware.com: initialize real_address on all code paths, per Jan] Link: https://lkml.kernel.org/r/20220226022655.350562-1-namit@vmware.com [akpm@linux-foundation.org: fix typo in comment, per Jan] Link: https://lkml.kernel.org/r/20220218041003.3508-1-namit@vmware.com Signed-off-by: Nadav Amit Acked-by: Peter Xu Reviewed-by: David Hildenbrand Acked-by: Mike Rapoport Reviewed-by: Jan Kara Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 05b31d60acf6..ef739054cb1c 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -32,7 +32,8 @@ UFFD_FEATURE_SIGBUS | \ UFFD_FEATURE_THREAD_ID | \ UFFD_FEATURE_MINOR_HUGETLBFS | \ - UFFD_FEATURE_MINOR_SHMEM) + UFFD_FEATURE_MINOR_SHMEM | \ + UFFD_FEATURE_EXACT_ADDRESS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -189,6 +190,10 @@ struct uffdio_api { * * UFFD_FEATURE_MINOR_SHMEM indicates the same support as * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. + * + * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page + * faults would be provided and the offset within the page would not be + * masked. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -201,6 +206,7 @@ struct uffdio_api { #define UFFD_FEATURE_THREAD_ID (1<<8) #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) #define UFFD_FEATURE_MINOR_SHMEM (1<<10) +#define UFFD_FEATURE_EXACT_ADDRESS (1<<11) __u64 features; __u64 ioctls; -- cgit v1.2.3 From e99653afeb9585350644c5ae4b0ca987cbe8d053 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 9 Mar 2022 17:22:53 +0100 Subject: rtc: add new RTC_FEATURE_ALARM_WAKEUP_ONLY feature Some RTCs have an IRQ pin that is not connected to a CPU interrupt but rather directly to a PMIC or power supply. In that case, it is still useful to be able to set alarms but we shouldn't expect interrupts. Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220309162301.61679-22-alexandre.belloni@bootlin.com --- include/uapi/linux/rtc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h index 03e5b776e597..97aca4503a6a 100644 --- a/include/uapi/linux/rtc.h +++ b/include/uapi/linux/rtc.h @@ -133,7 +133,8 @@ struct rtc_param { #define RTC_FEATURE_UPDATE_INTERRUPT 4 #define RTC_FEATURE_CORRECTION 5 #define RTC_FEATURE_BACKUP_SWITCH_MODE 6 -#define RTC_FEATURE_CNT 7 +#define RTC_FEATURE_ALARM_WAKEUP_ONLY 7 +#define RTC_FEATURE_CNT 8 /* parameter list */ #define RTC_PARAM_FEATURES 0 -- cgit v1.2.3 From c724c866bb70cb8c607081a26823a1f0ebde4387 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:29 -0700 Subject: linux/types.h: remove unnecessary __bitwise__ There are no users of "__bitwise__" except the definition of "__bitwise". Remove __bitwise__ and define __bitwise directly. This is a follow-up to 05de97003c77 ("linux/types.h: enable endian checks for all sparse builds"). [akpm@linux-foundation.org: change the tools/include/linux/types.h definition also] Link: https://lkml.kernel.org/r/20220310220927.245704-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Michael S. Tsirkin Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index f6d2f83cbe29..71696f424ac8 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -20,11 +20,10 @@ */ #ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) +#define __bitwise __attribute__((bitwise)) #else -#define __bitwise__ +#define __bitwise #endif -#define __bitwise __bitwise__ typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; -- cgit v1.2.3 From 179fd6ba3bacbf7b19cbdf9b14be109d54318394 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 23 Mar 2022 16:05:32 -0700 Subject: Documentation/sparse: add hints about __CHECKER__ Several attributes depend on __CHECKER__, but previously there was no clue in the tree about when __CHECKER__ might be defined. Add hints at the most common places (__kernel, __user, __iomem, __bitwise) and in the sparse documentation. Link: https://lkml.kernel.org/r/20220310220927.245704-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Michael S . Tsirkin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index 71696f424ac8..c4dc597f3dcf 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -19,6 +19,7 @@ * any application/library that wants linux/types.h. */ +/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */ #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else -- cgit v1.2.3 From 7ef66d186eb95f987a97fb3329b65c840e2dc9bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Mar 2022 06:53:18 -0600 Subject: io_uring: remove IORING_CQE_F_MSG This was introduced with the message ring opcode, but isn't strictly required for the request itself. The sender can encode what is needed in user_data, which is passed to the receiver. It's unclear if having a separate flag that essentially says "This CQE did not originate from an SQE on this ring" provides any real utility to applications. While we can always re-introduce a flag to provide this information, we cannot take it away at a later point in time. Remove the flag while we still can, before it's in a released kernel. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d2be4eb22008..784adc6f6ed2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -201,11 +201,9 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries - * IORING_CQE_F_MSG If set, CQE was generated with IORING_OP_MSG_RING */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) -#define IORING_CQE_F_MSG (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From 90a6951b58e935124eeb7ecd9fbc2426f841ac0c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Tue, 15 Feb 2022 11:04:29 +0530 Subject: Add definition of VIRTIO_F_IN_ORDER feature bit This patch adds the definition of VIRTIO_F_IN_ORDER feature bit in the relevant header file to make it available in QEMU's linux standard header file virtio_config.h, which is updated using scripts/update-linux-headers.sh Signed-off-by: Gautam Dawar Link: https://lore.kernel.org/r/20220215053430.24650-1-gdawar@xilinx.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index b5eda06f0d57..f0fb0ae021c0 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -82,6 +82,12 @@ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 +/* + * Inorder feature indicates that all buffers are used by the device + * in the same order in which they have been made available. + */ +#define VIRTIO_F_IN_ORDER 35 + /* * This feature indicates that memory accesses by the driver and the * device are ordered in a way described by the platform. -- cgit v1.2.3 From 13d640a3e9a3ac7ec694843d3d3b785e85fb8cb8 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:14 +0800 Subject: virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC Base on the lastest virtio crypto spec, define VIRTIO_CRYPTO_NOSPC. Reviewed-by: Gonglei Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-2-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_crypto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index a03932f10565..1166a49084b0 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -408,6 +408,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ +#define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) -- cgit v1.2.3 From 24e19590628b58578748eeaec8140bf9c9dc00d9 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:15 +0800 Subject: virtio-crypto: introduce akcipher service Introduce asymmetric service definition, asymmetric operations and several well known algorithms. Co-developed-by: lei he Signed-off-by: lei he Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-3-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei --- include/uapi/linux/virtio_crypto.h | 81 +++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 1166a49084b0..71a54a6849ca 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -37,6 +37,7 @@ #define VIRTIO_CRYPTO_SERVICE_HASH 1 #define VIRTIO_CRYPTO_SERVICE_MAC 2 #define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) @@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) +#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04) +#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05) __le32 opcode; __le32 algo; __le32 flag; @@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req { __u8 padding[32]; }; +struct virtio_crypto_rsa_session_para { +#define VIRTIO_CRYPTO_RSA_RAW_PADDING 0 +#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1 + __le32 padding_algo; + +#define VIRTIO_CRYPTO_RSA_NO_HASH 0 +#define VIRTIO_CRYPTO_RSA_MD2 1 +#define VIRTIO_CRYPTO_RSA_MD3 2 +#define VIRTIO_CRYPTO_RSA_MD4 3 +#define VIRTIO_CRYPTO_RSA_MD5 4 +#define VIRTIO_CRYPTO_RSA_SHA1 5 +#define VIRTIO_CRYPTO_RSA_SHA256 6 +#define VIRTIO_CRYPTO_RSA_SHA384 7 +#define VIRTIO_CRYPTO_RSA_SHA512 8 +#define VIRTIO_CRYPTO_RSA_SHA224 9 + __le32 hash_algo; +}; + +struct virtio_crypto_ecdsa_session_para { +#define VIRTIO_CRYPTO_CURVE_UNKNOWN 0 +#define VIRTIO_CRYPTO_CURVE_NIST_P192 1 +#define VIRTIO_CRYPTO_CURVE_NIST_P224 2 +#define VIRTIO_CRYPTO_CURVE_NIST_P256 3 +#define VIRTIO_CRYPTO_CURVE_NIST_P384 4 +#define VIRTIO_CRYPTO_CURVE_NIST_P521 5 + __le32 curve_id; + __le32 padding; +}; + +struct virtio_crypto_akcipher_session_para { +#define VIRTIO_CRYPTO_NO_AKCIPHER 0 +#define VIRTIO_CRYPTO_AKCIPHER_RSA 1 +#define VIRTIO_CRYPTO_AKCIPHER_DSA 2 +#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3 + __le32 algo; + +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC 1 +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2 + __le32 keytype; + __le32 keylen; + + union { + struct virtio_crypto_rsa_session_para rsa; + struct virtio_crypto_ecdsa_session_para ecdsa; + } u; +}; + +struct virtio_crypto_akcipher_create_session_req { + struct virtio_crypto_akcipher_session_para para; + __u8 padding[36]; +}; + struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 @@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req { mac_create_session; struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_akcipher_create_session_req + akcipher_create_session; struct virtio_crypto_destroy_session_req destroy_session; __u8 padding[56]; @@ -266,6 +325,14 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) +#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_SIGN \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) +#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03) __le32 opcode; /* algo should be service-specific algorithms */ __le32 algo; @@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req { __u8 padding[32]; }; +struct virtio_crypto_akcipher_para { + __le32 src_data_len; + __le32 dst_data_len; +}; + +struct virtio_crypto_akcipher_data_req { + struct virtio_crypto_akcipher_para para; + __u8 padding[40]; +}; + /* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { struct virtio_crypto_op_header header; @@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_hash_data_req hash_req; struct virtio_crypto_mac_data_req mac_req; struct virtio_crypto_aead_data_req aead_req; + struct virtio_crypto_akcipher_data_req akcipher_req; __u8 padding[48]; } u; }; @@ -409,6 +487,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ #define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ +#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) @@ -439,7 +518,7 @@ struct virtio_crypto_config { __le32 max_cipher_key_len; /* Maximum length of authenticated key */ __le32 max_auth_key_len; - __le32 reserve; + __le32 akcipher_algo; /* Maximum size of each crypto request's content */ __le64 max_size; }; -- cgit v1.2.3 From a61280ddddaa45f95b60dd54c05f8e0e5b6810b7 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:51 +0800 Subject: vdpa: support exposing the config size to userspace - GET_CONFIG_SIZE: return the size of the virtio config space. The size contains the fields which are conditional on feature bits. Acked-by: Jason Wang Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-2-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- include/uapi/linux/vhost.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c998860d7bbc..bc74e95a273a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -150,4 +150,8 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + #endif -- cgit v1.2.3 From b04d910af330b55e1d5d6eb9ecd53a375a9cf81c Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:53 +0800 Subject: vdpa: support exposing the count of vqs to userspace - GET_VQS_COUNT: the count of virtqueues that exposed Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-4-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Longpeng <longpeng2@huawei.com>
Reviewed-by: Stefano Garzarella --- include/uapi/linux/vhost.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bc74e95a273a..5d99e7c242a2 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -154,4 +154,7 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif -- cgit v1.2.3 From f941c51eeac7ebe0f8ec30943bf78e7f60aad039 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 29 Mar 2022 20:18:15 +0000 Subject: loop: fix ioctl calls using compat_loop_info Support for cryptoloop was deleted in commit 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer"), making the usage of loop_info->lo_encrypt_type obsolete. However, this member was also removed from the compat_loop_info definition and this breaks userspace ioctl calls for 32-bit binaries and CONFIG_COMPAT=y. This patch restores the compat_loop_info->lo_encrypt_type member and marks it obsolete as well as in the uapi header definitions. Fixes: 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer") Signed-off-by: Carlos Llamas Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220329201815.1347500-1-cmllamas@google.com Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 24a1c45bd1ae..98e60801195e 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -45,7 +45,7 @@ struct loop_info { unsigned long lo_inode; /* ioctl r/o */ __kernel_old_dev_t lo_rdevice; /* ioctl r/o */ int lo_offset; - int lo_encrypt_type; + int lo_encrypt_type; /* obsolete, ignored */ int lo_encrypt_key_size; /* ioctl w/o */ int lo_flags; char lo_name[LO_NAME_SIZE]; @@ -61,7 +61,7 @@ struct loop_info64 { __u64 lo_offset; __u64 lo_sizelimit;/* bytes, 0 == max available */ __u32 lo_number; /* ioctl r/o */ - __u32 lo_encrypt_type; + __u32 lo_encrypt_type; /* obsolete, ignored */ __u32 lo_encrypt_key_size; /* ioctl w/o */ __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; -- cgit v1.2.3 From 55037ed7bdc62151a726f5685f88afa6a82959b1 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 29 Mar 2022 10:12:52 -0700 Subject: uapi/linux/stddef.h: Add include guards Add include guard wrapper define to uapi/linux/stddef.h to prevent macro redefinition errors when stddef.h is included more than once. This was not needed before since the only contents already used a redefinition test. Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220329171252.57279-1-tadeusz.struk@linaro.org Fixes: 50d7bd38c3aa ("stddef: Introduce struct_group() helper macro") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 3021ea25a284..7837ba4fe728 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_STDDEF_H +#define _UAPI_LINUX_STDDEF_H + #include #ifndef __always_inline @@ -41,3 +44,4 @@ struct { } __empty_ ## NAME; \ TYPE NAME[]; \ } +#endif -- cgit v1.2.3 From 35025735a79eaa894c43837b94fd33c9d6b122df Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:18 +0000 Subject: KVM: x86/xen: Support direct injection of event channel events This adds a KVM_XEN_HVM_EVTCHN_SEND ioctl which allows direct injection of events given an explicit { vcpu, port, priority } in precisely the same form that those fields are given in the IRQ routing table. Userspace is currently able to inject 2-level events purely by setting the bits in the shared_info and vcpu_info, but FIFO event channels are harder to deal with; we will need the kernel to take sole ownership of delivery when we support those. A patch advertising this feature with a new bit in the KVM_CAP_XEN_HVM ioctl will be added in a subsequent patch. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-9-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 91a6fe4e02c0..49cd2e9e0f6a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1699,6 +1699,9 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_HVM_EVTCHN_SEND _IOW(KVMIO, 0xd0, struct kvm_irq_routing_xen_evtchn) + #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) -- cgit v1.2.3 From 2fd6df2f2b47d4301b1ee0fe9d627d1c061a5988 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 3 Mar 2022 15:41:19 +0000 Subject: KVM: x86/xen: intercept EVTCHNOP_send from guests Userspace registers a sending @port to either deliver to an @eventfd or directly back to a local event channel port. After binding events the guest or host may wish to bind those events to a particular vcpu. This is usually done for unbound and and interdomain events. Update requests are handled via the KVM_XEN_EVTCHN_UPDATE flag. Unregistered ports are handled by the emulator. Co-developed-by: Ankur Arora Co-developed-By: David Woodhouse Signed-off-by: Joao Martins Signed-off-by: Ankur Arora Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-10-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 49cd2e9e0f6a..623ed2cb228f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1686,6 +1686,32 @@ struct kvm_xen_hvm_attr { struct { __u64 gfn; } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u64 pad[8]; } u; }; @@ -1694,6 +1720,8 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -- cgit v1.2.3 From 942c2490c23f2800ad8143f5eb84a79b859aa743 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:21 +0000 Subject: KVM: x86/xen: Add KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID In order to intercept hypercalls such as VCPUOP_set_singleshot_timer, we need to be aware of the Xen CPU numbering. This looks a lot like the Hyper-V handling of vpidx, for obvious reasons. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-12-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 623ed2cb228f..4b65e9f0a4d9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1747,6 +1747,7 @@ struct kvm_xen_vcpu_attr { __u64 time_blocked; __u64 time_offline; } runstate; + __u32 vcpu_id; } u; }; @@ -1757,6 +1758,8 @@ struct kvm_xen_vcpu_attr { #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 536395260582be7443b0b35b0bbb89ffe3947f62 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Thu, 3 Mar 2022 15:41:22 +0000 Subject: KVM: x86/xen: handle PV timers oneshot mode If the guest has offloaded the timer virq, handle the following hypercalls for programming the timer: VCPUOP_set_singleshot_timer VCPUOP_stop_singleshot_timer set_timer_op(timestamp_ns) The event channel corresponding to the timer virq is then used to inject events once timer deadlines are met. For now we back the PV timer with hrtimer. [ dwmw2: Add save/restore, 32-bit compat mode, immediate delivery, don't check timer in kvm_vcpu_has_event() ] Signed-off-by: Joao Martins Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-13-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4b65e9f0a4d9..cb223e425223 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1748,6 +1748,11 @@ struct kvm_xen_vcpu_attr { __u64 time_offline; } runstate; __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; } u; }; @@ -1760,6 +1765,7 @@ struct kvm_xen_vcpu_attr { #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 28d1629f751c4a5f9437fbaa0ee4ed81d1a8e587 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:23 +0000 Subject: KVM: x86/xen: Kernel acceleration for XENVER_version Turns out this is a fast path for PV guests because they use it to trigger the event channel upcall. So letting it bounce all the way up to userspace is not great. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-14-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index cb223e425223..4dda3896ed71 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1711,7 +1711,7 @@ struct kvm_xen_hvm_attr { __u32 padding[4]; } deliver; } evtchn; - + __u32 xen_version; __u64 pad[8]; } u; }; @@ -1722,6 +1722,7 @@ struct kvm_xen_hvm_attr { #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -- cgit v1.2.3 From fde0451be8fb3208d4d146b8602d99ee8139e515 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:24 +0000 Subject: KVM: x86/xen: Support per-vCPU event channel upcall via local APIC Windows uses a per-vCPU vector, and it's delivered via the local APIC basically like an MSI (with associated EOI) unlike the traditional guest-wide vector which is just magically asserted by Xen (and in the KVM case by kvm_xen_has_interrupt() / kvm_cpu_get_extint()). Now that the kernel is able to raise event channel events for itself, being able to do so for Windows guests is also going to be useful. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-15-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4dda3896ed71..a9ba690c4f37 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1754,6 +1754,7 @@ struct kvm_xen_vcpu_attr { __u32 priority; __u64 expires_ns; } timer; + __u8 vector; } u; }; @@ -1767,6 +1768,7 @@ struct kvm_xen_vcpu_attr { /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 #define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { -- cgit v1.2.3 From 661a20fab7d156cf6b9a407c946a1e558a633151 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 3 Mar 2022 15:41:25 +0000 Subject: KVM: x86/xen: Advertise and document KVM_XEN_HVM_CONFIG_EVTCHN_SEND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the end of the patch series adding this batch of event channel acceleration features, finally add the feature bit which advertises them and document it all. For SCHEDOP_poll we need to wake a polling vCPU when a given port is triggered, even when it's masked — and we want to implement that in the kernel, for efficiency. So we want the kernel to know that it has sole ownership of event channel delivery. Thus, we allow userspace to make the 'promise' by setting the corresponding feature bit in its KVM_XEN_HVM_CONFIG call. As we implement SCHEDOP_poll bypass later, we will do so only if that promise has been made by userspace. Signed-off-by: David Woodhouse Signed-off-by: Paolo Bonzini Message-Id: <20220303154127.202856-16-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index a9ba690c4f37..ee5cc9e2a837 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1232,6 +1232,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) struct kvm_xen_hvm_config { __u32 flags; -- cgit v1.2.3 From ffbb61d09fc56c85e28b110494f3788d0ed4d1f8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 25 Feb 2022 14:53:02 +0000 Subject: KVM: x86: Accept KVM_[GS]ET_TSC_KHZ as a VM ioctl. This sets the default TSC frequency for subsequently created vCPUs. Signed-off-by: David Woodhouse Message-Id: <20220225145304.36166-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ee5cc9e2a837..8616af85dc5d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1144,6 +1144,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +#define KVM_CAP_VM_TSC_CONTROL 214 #ifdef KVM_CAP_IRQ_ROUTING @@ -1471,7 +1472,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) /* Available with KVM_CAP_PPC_GET_PVINFO */ #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) -/* Available with KVM_CAP_TSC_CONTROL */ +/* Available with KVM_CAP_TSC_CONTROL for a vCPU, or with +* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) /* Available with KVM_CAP_PCI_2_3 */ -- cgit v1.2.3 From 1cd927ad6f62f27d8908498dcbf61395c5dd5fe2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 1 Apr 2022 14:39:03 -0400 Subject: tracing: mark user_events as BROKEN After being merged, user_events become more visible to a wider audience that have concerns with the current API. It is too late to fix this for this release, but instead of a full revert, just mark it as BROKEN (which prevents it from being selected in make config). Then we can work finding a better API. If that fails, then it will need to be completely reverted. To not have the code silently bitrot, still allow building it with COMPILE_TEST. And to prevent the uapi header from being installed, then later changed, and then have an old distro user space see the old version, move the header file out of the uapi directory. Surround the include with CONFIG_COMPILE_TEST to the current location, but when the BROKEN tag is taken off, it will use the uapi directory, and fail to compile. This is a good way to remind us to move the header back. Link: https://lore.kernel.org/all/20220330155835.5e1f6669@gandalf.local.home Link: https://lkml.kernel.org/r/20220330201755.29319-1-mathieu.desnoyers@efficios.com Suggested-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt (Google) Signed-off-by: Linus Torvalds --- include/uapi/linux/user_events.h | 116 --------------------------------------- 1 file changed, 116 deletions(-) delete mode 100644 include/uapi/linux/user_events.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h deleted file mode 100644 index e570840571e1..000000000000 --- a/include/uapi/linux/user_events.h +++ /dev/null @@ -1,116 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (c) 2021, Microsoft Corporation. - * - * Authors: - * Beau Belgrave - */ -#ifndef _UAPI_LINUX_USER_EVENTS_H -#define _UAPI_LINUX_USER_EVENTS_H - -#include -#include - -#ifdef __KERNEL__ -#include -#else -#include -#endif - -#define USER_EVENTS_SYSTEM "user_events" -#define USER_EVENTS_PREFIX "u:" - -/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */ -#define EVENT_BIT_FTRACE 0 -#define EVENT_BIT_PERF 1 -#define EVENT_BIT_OTHER 7 - -#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE) -#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF) -#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER) - -/* Create dynamic location entry within a 32-bit value */ -#define DYN_LOC(offset, size) ((size) << 16 | (offset)) - -/* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ -#define FLAG_BPF_ITER (1 << 0) - -/* - * Describes an event registration and stores the results of the registration. - * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum - * must set the size and name_args before invocation. - */ -struct user_reg { - - /* Input: Size of the user_reg structure being used */ - __u32 size; - - /* Input: Pointer to string with event name, description and flags */ - __u64 name_args; - - /* Output: Byte index of the event within the status page */ - __u32 status_index; - - /* Output: Index of the event to use when writing data */ - __u32 write_index; -}; - -#define DIAG_IOC_MAGIC '*' - -/* Requests to register a user_event */ -#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*) - -/* Requests to delete a user_event */ -#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) - -/* Data type that was passed to the BPF program */ -enum { - /* Data resides in kernel space */ - USER_BPF_DATA_KERNEL, - - /* Data resides in user space */ - USER_BPF_DATA_USER, - - /* Data is a pointer to a user_bpf_iter structure */ - USER_BPF_DATA_ITER, -}; - -/* - * Describes an iovec iterator that BPF programs can use to access data for - * a given user_event write() / writev() call. - */ -struct user_bpf_iter { - - /* Offset of the data within the first iovec */ - __u32 iov_offset; - - /* Number of iovec structures */ - __u32 nr_segs; - - /* Pointer to iovec structures */ - const struct iovec *iov; -}; - -/* Context that BPF programs receive when attached to a user_event */ -struct user_bpf_context { - - /* Data type being passed (see union below) */ - __u32 data_type; - - /* Length of the data */ - __u32 data_len; - - /* Pointer to data, varies by data type */ - union { - /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ - void *kdata; - - /* User data (data_type == USER_BPF_DATA_USER) */ - void *udata; - - /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ - struct user_bpf_iter *iter; - }; -}; - -#endif /* _UAPI_LINUX_USER_EVENTS_H */ -- cgit v1.2.3 From 66df0fdb5981052f3ad97c9879eda93712bdefc2 Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Sun, 3 Apr 2022 19:53:26 +0800 Subject: bpf: Correct the comment for BTF kind bitfield The commit 8fd886911a6a ("bpf: Add BTF_KIND_FLOAT to uapi") has extended the BTF kind bitfield from 4 to 5 bits, correct the comment. Signed-off-by: Haiyue Wang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220403115327.205964-1-haiyue.wang@intel.com --- include/uapi/linux/btf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index b0d8fea1951d..a9162a6c0284 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -33,8 +33,8 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ -- cgit v1.2.3 From f56b919fa4f1b27c589e71f7d90e9785f9196bf1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Feb 2022 09:39:22 +0100 Subject: linux/fb.h: Spelling s/palette/palette/ Fix a misspelling of "palette" in a comment. Signed-off-by: Geert Uytterhoeven Reviewed-by: Pekka Paalanen Signed-off-by: Helge Deller --- include/uapi/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index 4c14e8be7267..3a49913d006c 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -182,7 +182,7 @@ struct fb_fix_screeninfo { * * For pseudocolor: offset and length should be the same for all color * components. Offset specifies the position of the least significant bit - * of the pallette index in a pixel value. Length indicates the number + * of the palette index in a pixel value. Length indicates the number * of available palette entries (i.e. # of entries = 1 << length). */ struct fb_bitfield { -- cgit v1.2.3 From 1ee375d77bb944321c969b456aa73994566cecf6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 4 Apr 2022 10:54:47 -0700 Subject: net, uapi: remove inclusion of arpa/inet.h In include/uapi/linux/tipc_config.h, there's a comment that it includes arpa/inet.h for ntohs; but ntohs is not defined in any UAPI header. For now, reuse the definitions from include/linux/byteorder/generic.h, since the various conversion functions do exist in UAPI headers: include/uapi/linux/byteorder/big_endian.h include/uapi/linux/byteorder/little_endian.h We would like to get to the point where we can build UAPI header tests with -nostdinc, meaning that kernel UAPI headers should not have a circular dependency on libc headers. Link: https://android-review.googlesource.com/c/platform/bionic/+/2048127 Suggested-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/uapi/linux/tipc_config.h | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4dfc05651c98..c00adf2fe868 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -43,10 +43,6 @@ #include #include -#ifndef __KERNEL__ -#include /* for ntohs etc. */ -#endif - /* * Configuration * @@ -269,33 +265,33 @@ static inline int TLV_OK(const void *tlv, __u16 space) */ return (space >= TLV_SPACE(0)) && - (ntohs(((struct tlv_desc *)tlv)->tlv_len) <= space); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space); } static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type) { return TLV_OK(tlv, space) && - (ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type); } static inline int TLV_GET_LEN(struct tlv_desc *tlv) { - return ntohs(tlv->tlv_len); + return __be16_to_cpu(tlv->tlv_len); } static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len) { - tlv->tlv_len = htons(len); + tlv->tlv_len = __cpu_to_be16(len); } static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type) { - return (ntohs(tlv->tlv_type) == type); + return (__be16_to_cpu(tlv->tlv_type) == type); } static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type) { - tlv->tlv_type = htons(type); + tlv->tlv_type = __cpu_to_be16(type); } static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) @@ -305,8 +301,8 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) tlv_len = TLV_LENGTH(len); tlv_ptr = (struct tlv_desc *)tlv; - tlv_ptr->tlv_type = htons(type); - tlv_ptr->tlv_len = htons(tlv_len); + tlv_ptr->tlv_type = __cpu_to_be16(type); + tlv_ptr->tlv_len = __cpu_to_be16(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); @@ -348,7 +344,7 @@ static inline void *TLV_LIST_DATA(struct tlv_list_desc *list) static inline void TLV_LIST_STEP(struct tlv_list_desc *list) { - __u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len)); + __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len)); list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space); list->tlv_space -= tlv_space; @@ -404,9 +400,9 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, msg_len = TCM_LENGTH(data_len); tcm_hdr = (struct tipc_cfg_msg_hdr *)msg; - tcm_hdr->tcm_len = htonl(msg_len); - tcm_hdr->tcm_type = htons(cmd); - tcm_hdr->tcm_flags = htons(flags); + tcm_hdr->tcm_len = __cpu_to_be32(msg_len); + tcm_hdr->tcm_type = __cpu_to_be16(cmd); + tcm_hdr->tcm_flags = __cpu_to_be16(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); -- cgit v1.2.3 From fce96cf0443083e37455eff8f78fd240c621dae3 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:53 -0600 Subject: virt: Add SEV-SNP guest driver The SEV-SNP specification provides the guest a mechanism to communicate with the PSP without risk from a malicious hypervisor who wishes to read, alter, drop or replay the messages sent. The driver uses snp_issue_guest_request() to issue GHCB SNP_GUEST_REQUEST or SNP_EXT_GUEST_REQUEST NAE events to submit the request to PSP. The PSP requires that all communication should be encrypted using key specified through a struct snp_guest_platform_data descriptor. Userspace can use SNP_GET_REPORT ioctl() to query the guest attestation report. See SEV-SNP spec section Guest Messages for more details. [ bp: Remove the "what" from the commit message, massage. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-44-brijesh.singh@amd.com --- include/uapi/linux/sev-guest.h | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/uapi/linux/sev-guest.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h new file mode 100644 index 000000000000..38f11d723c68 --- /dev/null +++ b/include/uapi/linux/sev-guest.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Userspace interface for AMD SEV and SNP guest driver. + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh + * + * SEV API specification is available at: https://developer.amd.com/sev/ + */ + +#ifndef __UAPI_LINUX_SEV_GUEST_H_ +#define __UAPI_LINUX_SEV_GUEST_H_ + +#include + +struct snp_report_req { + /* user data that should be included in the report */ + __u8 user_data[64]; + + /* The vmpl level to be included in the report */ + __u32 vmpl; + + /* Must be zero filled */ + __u8 rsvd[28]; +}; + +struct snp_report_resp { + /* response data, see SEV-SNP spec for the format */ + __u8 data[4000]; +}; + +struct snp_guest_request_ioctl { + /* message version number (must be non-zero) */ + __u8 msg_version; + + /* Request and response structure address */ + __u64 req_data; + __u64 resp_data; + + /* firmware error code on failure (see psp-sev.h) */ + __u64 fw_err; +}; + +#define SNP_GUEST_REQ_IOC_TYPE 'S' + +/* Get SNP attestation report */ +#define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl) + +#endif /* __UAPI_LINUX_SEV_GUEST_H_ */ -- cgit v1.2.3 From 68de0b2f938642079c0c853b219bdb88c4dc4d13 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 24 Feb 2022 10:56:23 -0600 Subject: virt: sevguest: Add support to derive key The SNP_GET_DERIVED_KEY ioctl interface can be used by the SNP guest to ask the firmware to provide a key derived from a root key. The derived key may be used by the guest for any purposes it chooses, such as a sealing key or communicating with the external entities. See SEV-SNP firmware spec for more information. [ bp: No need to memset "req" - it will get overwritten. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Liam Merwick Link: https://lore.kernel.org/r/20220307213356.2797205-45-brijesh.singh@amd.com --- include/uapi/linux/sev-guest.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 38f11d723c68..598367f12064 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -30,6 +30,20 @@ struct snp_report_resp { __u8 data[4000]; }; +struct snp_derived_key_req { + __u32 root_key_select; + __u32 rsvd; + __u64 guest_field_select; + __u32 vmpl; + __u32 guest_svn; + __u64 tcb_version; +}; + +struct snp_derived_key_resp { + /* response data, see SEV-SNP spec for the format */ + __u8 data[64]; +}; + struct snp_guest_request_ioctl { /* message version number (must be non-zero) */ __u8 msg_version; @@ -47,4 +61,7 @@ struct snp_guest_request_ioctl { /* Get SNP attestation report */ #define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl) +/* Get a derived key from the root */ +#define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl) + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ -- cgit v1.2.3 From d80b494f712317493d464a55652698c4d1b7bb0f Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:55 -0600 Subject: virt: sevguest: Add support to get extended report Version 2 of GHCB specification defines Non-Automatic-Exit (NAE) to get extended guest report which is similar to the SNP_GET_REPORT ioctl. The main difference is related to the additional data that will be returned. That additional data returned is a certificate blob that can be used by the SNP guest user. The certificate blob layout is defined in the GHCB specification. The driver simply treats the blob as a opaque data and copies it to userspace. [ bp: Massage commit message, cast 1st arg of access_ok() ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-46-brijesh.singh@amd.com --- include/uapi/linux/sev-guest.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 598367f12064..256aaeff7e65 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -56,6 +56,16 @@ struct snp_guest_request_ioctl { __u64 fw_err; }; +struct snp_ext_report_req { + struct snp_report_req data; + + /* where to copy the certificate blob */ + __u64 certs_address; + + /* length of the certificate blob */ + __u32 certs_len; +}; + #define SNP_GUEST_REQ_IOC_TYPE 'S' /* Get SNP attestation report */ @@ -64,4 +74,7 @@ struct snp_guest_request_ioctl { /* Get a derived key from the root */ #define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl) +/* Get SNP extended report as defined in the GHCB specification version 2. */ +#define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl) + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ -- cgit v1.2.3 From 794c24e9921f32ded4422833a990ccf11dc3c00e Mon Sep 17 00:00:00 2001 From: Jeffrey Ji Date: Wed, 6 Apr 2022 17:26:00 +0000 Subject: net-core: rx_otherhost_dropped to core_stats Increment rx_otherhost_dropped counter when packet dropped due to mismatched dest MAC addr. An example when this drop can occur is when manually crafting raw packets that will be consumed by a user space application via a tap device. For testing purposes local traffic was generated using trafgen for the client and netcat to start a server Tested: Created 2 netns, sent 1 packet using trafgen from 1 to the other with "{eth(daddr=$INCORRECT_MAC...}", verified that iproute2 showed the counter was incremented. (Also had to modify iproute2 to show the stat, additional patch for that coming next.) Signed-off-by: Jeffrey Ji Reviewed-by: Brian Vazquez Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220406172600.1141083-1-jeffreyjilinux@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc284c048e69..d1e600816b82 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -211,6 +211,9 @@ struct rtnl_link_stats { * @rx_nohandler: Number of packets received on the interface * but dropped by the networking stack because the device is * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. */ struct rtnl_link_stats64 { __u64 rx_packets; @@ -243,6 +246,8 @@ struct rtnl_link_stats64 { __u64 rx_compressed; __u64 tx_compressed; __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; }; /* Subset of link stats useful for in-HW collection. Meaning of the fields is as -- cgit v1.2.3 From c4212f3eb89fd5654f0a6ed2ee1d13fcb86cb664 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 10 Apr 2022 15:13:24 -0600 Subject: io_uring: flag the fact that linked file assignment is sane Give applications a way to tell if the kernel supports sane linked files, as in files being assigned at the right time to be able to reliably do while using IOSQE_IO_LINK to order them. Not really a bug fix, but flag it as such so that it gets pulled in with backports of the deferred file assignment. Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 784adc6f6ed2..1845cf7c80ba 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -296,6 +296,7 @@ struct io_uring_params { #define IORING_FEAT_NATIVE_WORKERS (1U << 9) #define IORING_FEAT_RSRC_TAGS (1U << 10) #define IORING_FEAT_CQE_SKIP (1U << 11) +#define IORING_FEAT_LINKED_FILE (1U << 12) /* * io_uring_register(2) opcodes and arguments -- cgit v1.2.3 From 2d7991fe867974a8e5065ee9691451a406b9320d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 11 Mar 2022 16:23:22 -0700 Subject: dmaengine: idxd: update IAA definitions for user header Add additional structure definitions for Intel In-memory Analytics Accelerator (IAA/IAX). See specification (1) for more details. 1: https://cdrdv2.intel.com/v1/dl/getContent/721858 Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/164704100212.1373038.18362680016033557757.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index a8f0ff75c430..bce7c43657d5 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -53,6 +53,11 @@ enum idxd_scmd_stat { /* IAX */ #define IDXD_OP_FLAG_RD_SRC2_AECS 0x010000 +#define IDXD_OP_FLAG_RD_SRC2_2ND 0x020000 +#define IDXD_OP_FLAG_WR_SRC2_AECS_COMP 0x040000 +#define IDXD_OP_FLAG_WR_SRC2_AECS_OVFL 0x080000 +#define IDXD_OP_FLAG_SRC2_STS 0x100000 +#define IDXD_OP_FLAG_CRC_RFC3720 0x200000 /* Opcode */ enum dsa_opcode { @@ -81,6 +86,18 @@ enum iax_opcode { IAX_OPCODE_MEMMOVE, IAX_OPCODE_DECOMPRESS = 0x42, IAX_OPCODE_COMPRESS, + IAX_OPCODE_CRC64, + IAX_OPCODE_ZERO_DECOMP_32 = 0x48, + IAX_OPCODE_ZERO_DECOMP_16, + IAX_OPCODE_DECOMP_32 = 0x4c, + IAX_OPCODE_DECOMP_16, + IAX_OPCODE_SCAN = 0x50, + IAX_OPCODE_SET_MEMBER, + IAX_OPCODE_EXTRACT, + IAX_OPCODE_SELECT, + IAX_OPCODE_RLE_BURST, + IAX_OPCDE_FIND_UNIQUE, + IAX_OPCODE_EXPAND, }; /* Completion record status */ @@ -120,6 +137,7 @@ enum iax_completion_status { IAX_COMP_NONE = 0, IAX_COMP_SUCCESS, IAX_COMP_PAGE_FAULT_IR = 0x04, + IAX_COMP_ANALYTICS_ERROR = 0x0a, IAX_COMP_OUTBUF_OVERFLOW, IAX_COMP_BAD_OPCODE = 0x10, IAX_COMP_INVALID_FLAGS, @@ -140,7 +158,10 @@ enum iax_completion_status { IAX_COMP_WATCHDOG, IAX_COMP_INVALID_COMP_FLAG = 0x30, IAX_COMP_INVALID_FILTER_FLAG, - IAX_COMP_INVALID_NUM_ELEMS = 0x33, + IAX_COMP_INVALID_INPUT_SIZE, + IAX_COMP_INVALID_NUM_ELEMS, + IAX_COMP_INVALID_SRC1_WIDTH, + IAX_COMP_INVALID_INVERT_OUT, }; #define DSA_COMP_STATUS_MASK 0x7f @@ -319,8 +340,12 @@ struct iax_completion_record { uint32_t output_size; uint8_t output_bits; uint8_t rsvd3; - uint16_t rsvd4; - uint64_t rsvd5[4]; + uint16_t xor_csum; + uint32_t crc; + uint32_t min; + uint32_t max; + uint32_t sum; + uint64_t rsvd4[2]; } __attribute__((packed)); struct iax_raw_completion_record { -- cgit v1.2.3 From 545528d788556c724eeb5400757f828ef27782a8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 13 Apr 2022 13:51:54 +0300 Subject: net: netlink: add NLM_F_BULK delete request modifier Add a new delete request modifier called NLM_F_BULK which, when supported, would cause the request to delete multiple objects. The flag is a convenient way to signal that a multiple delete operation is requested which can be gradually added to different delete requests. In order to make sure older kernels will error out if the operation is not supported instead of doing something unintended we have to break a required condition when implementing support for this flag, f.e. for neighbors we will omit the mandatory mac address attribute. Initially it will be used to add flush with filtering support for bridge fdbs, but it also opens the door to add similar support to others. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 4c0cde075c27..855dffb4c1c3 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -72,6 +72,7 @@ struct nlmsghdr { /* Modifiers to DELETE request */ #define NLM_F_NONREC 0x100 /* Do not delete recursively */ +#define NLM_F_BULK 0x200 /* Delete multiple objects */ /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ -- cgit v1.2.3 From ea2c0f9e3fc2f94f090d693b7235c02af1289629 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 13 Apr 2022 13:52:00 +0300 Subject: net: rtnetlink: add ndm flags and state mask attributes Add ndm flags/state masks which will be used for bulk delete filtering. All of these are used by the bridge and vxlan drivers. Also minimal attr policy validation is added, it is up to ndo_fdb_del_bulk implementers to further validate them. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index db05fb55055e..39c565e460c7 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -32,6 +32,8 @@ enum { NDA_NH_ID, NDA_FDB_EXT_ATTRS, NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, __NDA_MAX }; -- cgit v1.2.3 From c24a950ec7d60c4da91dc3f273295c7f438b531e Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 7 Apr 2022 14:02:33 -0700 Subject: KVM, SEV: Add KVM_EXIT_SHUTDOWN metadata for SEV-ES If an SEV-ES guest requests termination, exit to userspace with KVM_EXIT_SYSTEM_EVENT and a dedicated SEV_TERM type instead of -EINVAL so that userspace can take appropriate action. See AMD's GHCB spec section '4.1.13 Termination Request' for more details. Suggested-by: Sean Christopherson Suggested-by: Paolo Bonzini Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Gonda Reported-by: kernel test robot Message-Id: <20220407210233.782250-1-pgonda@google.com> [Add documentatino. - Paolo] Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8616af85dc5d..dd1d8167e71f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,8 +444,11 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_SEV_TERM 4 +#define KVM_SYSTEM_EVENT_NDATA_VALID (1u << 31) __u32 type; - __u64 flags; + __u32 ndata; + __u64 data[16]; } system_event; /* KVM_EXIT_S390_STSI */ struct { -- cgit v1.2.3 From 4dc84c06a343fcb95fd5a0acb537aefa4ebdd1b0 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 12 Apr 2022 10:01:19 +0800 Subject: net: ethtool: extend ringparam set/get APIs for tx_push Currently tx push is a standard driver feature which controls use of a fast path descriptor push. So this patch extends the ringparam APIs and data structures to support set/get tx push by ethtool -G/g. Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 979850221b8d..d2fb4f7be61b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -338,6 +338,7 @@ enum { ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, -- cgit v1.2.3 From f9a2fb73318eb4dbf8cd84866b8b0dd012d8b116 Mon Sep 17 00:00:00 2001 From: Arun Ajith S Date: Fri, 15 Apr 2022 08:34:02 +0000 Subject: net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131 Add a new neighbour cache entry in STALE state for routers on receiving an unsolicited (gratuitous) neighbour advertisement with target link-layer-address option specified. This is similar to the arp_accept configuration for IPv4. A new sysctl endpoint is created to turn on this behaviour: /proc/sys/net/ipv6/conf/interface/accept_unsolicited_na. Signed-off-by: Arun Ajith S Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index d4178dace0bf..549ddeaf788b 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -194,6 +194,7 @@ enum { DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, + DEVCONF_ACCEPT_UNSOLICITED_NA, DEVCONF_MAX }; -- cgit v1.2.3 From 470776c6b03491a3e82c644737a6da5466b8b3eb Mon Sep 17 00:00:00 2001 From: Shelby Heffron Date: Sun, 17 Apr 2022 13:05:08 -0700 Subject: Input: add Marine Navigation Keycodes Add keycodes that are used by marine navigation devices. Signed-off-by: Shelby Heffron Link: https://lore.kernel.org/r/20220414015356.1619310-1-Shelby.Heffron@garmin.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 7989d9483ea7..dff8e7f17074 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -662,6 +662,27 @@ /* Select an area of screen to be copied */ #define KEY_SELECTIVE_SCREENSHOT 0x27a +/* Move the focus to the next or previous user controllable element within a UI container */ +#define KEY_NEXT_ELEMENT 0x27b +#define KEY_PREVIOUS_ELEMENT 0x27c + +/* Toggle Autopilot engagement */ +#define KEY_AUTOPILOT_ENGAGE_TOGGLE 0x27d + +/* Shortcut Keys */ +#define KEY_MARK_WAYPOINT 0x27e +#define KEY_SOS 0x27f +#define KEY_NAV_CHART 0x280 +#define KEY_FISHING_CHART 0x281 +#define KEY_SINGLE_RANGE_RADAR 0x282 +#define KEY_DUAL_RANGE_RADAR 0x283 +#define KEY_RADAR_OVERLAY 0x284 +#define KEY_TRADITIONAL_SONAR 0x285 +#define KEY_CLEARVU_SONAR 0x286 +#define KEY_SIDEVU_SONAR 0x287 +#define KEY_NAV_INFO 0x288 +#define KEY_BRIGHTNESS_MENU 0x289 + /* * Some keyboards have keys which do not have a defined meaning, these keys * are intended to be programmed / bound to macros by the user. For most -- cgit v1.2.3 From c246f9b5fd617fe487f8b6f18851703f468501d6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 18 Apr 2022 09:42:25 +0300 Subject: devlink: add support to create line card and expose to user Extend the devlink API so the driver is going to be able to create and destroy linecard instances. There can be multiple line cards per devlink device. Expose this new type of object over devlink netlink API to the userspace, with notifications. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b897b80770f6..59c33ed2d3e7 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -131,6 +131,11 @@ enum devlink_command { DEVLINK_CMD_RATE_NEW, DEVLINK_CMD_RATE_DEL, + DEVLINK_CMD_LINECARD_GET, /* can dump */ + DEVLINK_CMD_LINECARD_SET, + DEVLINK_CMD_LINECARD_NEW, + DEVLINK_CMD_LINECARD_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -553,6 +558,8 @@ enum devlink_attr { DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, /* u32 */ + DEVLINK_ATTR_LINECARD_INDEX, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From fcdc8ce23a309c26a67fc613a741d9b21a248311 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 18 Apr 2022 09:42:26 +0300 Subject: devlink: implement line card provisioning In order to be able to configure all needed stuff on a port/netdevice of a line card without the line card being present, introduce line card provisioning. Basically by setting a type, provisioning process will start and driver is supposed to create a placeholder for instances (ports/netdevices) for a line card type. Allow the user to query the supported line card types over line card get command. Then implement two netlink command SET to allow user to set/unset the card type. On the driver API side, add provision/unprovision ops and supported types array to be advertised. Upon provision op call, the driver should take care of creating the instances for the particular line card type. Introduce provision_set/clear() functions to be called by the driver once the provisioning/unprovisioning is done on its side. These helpers are not to be called directly due to the async nature of provisioning. Example: $ devlink port # No ports are listed $ devlink lc pci/0000:01:00.0: lc 1 state unprovisioned supported_types: 16x100G lc 2 state unprovisioned supported_types: 16x100G lc 3 state unprovisioned supported_types: 16x100G lc 4 state unprovisioned supported_types: 16x100G lc 5 state unprovisioned supported_types: 16x100G lc 6 state unprovisioned supported_types: 16x100G lc 7 state unprovisioned supported_types: 16x100G lc 8 state unprovisioned supported_types: 16x100G $ devlink lc set pci/0000:01:00.0 lc 8 type 16x100G $ devlink lc show pci/0000:01:00.0 lc 8 pci/0000:01:00.0: lc 8 state active type 16x100G supported_types: 16x100G $ devlink port pci/0000:01:00.0/0: type notset flavour cpu port 0 splittable false pci/0000:01:00.0/53: type eth netdev enp1s0nl8p1 flavour physical lc 8 port 1 splittable true lanes 4 pci/0000:01:00.0/54: type eth netdev enp1s0nl8p2 flavour physical lc 8 port 2 splittable true lanes 4 pci/0000:01:00.0/55: type eth netdev enp1s0nl8p3 flavour physical lc 8 port 3 splittable true lanes 4 pci/0000:01:00.0/56: type eth netdev enp1s0nl8p4 flavour physical lc 8 port 4 splittable true lanes 4 pci/0000:01:00.0/57: type eth netdev enp1s0nl8p5 flavour physical lc 8 port 5 splittable true lanes 4 pci/0000:01:00.0/58: type eth netdev enp1s0nl8p6 flavour physical lc 8 port 6 splittable true lanes 4 pci/0000:01:00.0/59: type eth netdev enp1s0nl8p7 flavour physical lc 8 port 7 splittable true lanes 4 pci/0000:01:00.0/60: type eth netdev enp1s0nl8p8 flavour physical lc 8 port 8 splittable true lanes 4 pci/0000:01:00.0/61: type eth netdev enp1s0nl8p9 flavour physical lc 8 port 9 splittable true lanes 4 pci/0000:01:00.0/62: type eth netdev enp1s0nl8p10 flavour physical lc 8 port 10 splittable true lanes 4 pci/0000:01:00.0/63: type eth netdev enp1s0nl8p11 flavour physical lc 8 port 11 splittable true lanes 4 pci/0000:01:00.0/64: type eth netdev enp1s0nl8p12 flavour physical lc 8 port 12 splittable true lanes 4 pci/0000:01:00.0/125: type eth netdev enp1s0nl8p13 flavour physical lc 8 port 13 splittable true lanes 4 pci/0000:01:00.0/126: type eth netdev enp1s0nl8p14 flavour physical lc 8 port 14 splittable true lanes 4 pci/0000:01:00.0/127: type eth netdev enp1s0nl8p15 flavour physical lc 8 port 15 splittable true lanes 4 pci/0000:01:00.0/128: type eth netdev enp1s0nl8p16 flavour physical lc 8 port 16 splittable true lanes 4 $ devlink lc set pci/0000:01:00.0 lc 8 notype Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 59c33ed2d3e7..de91e4a0d476 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -343,6 +343,18 @@ enum devlink_reload_limit { #define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1) +enum devlink_linecard_state { + DEVLINK_LINECARD_STATE_UNSPEC, + DEVLINK_LINECARD_STATE_UNPROVISIONED, + DEVLINK_LINECARD_STATE_UNPROVISIONING, + DEVLINK_LINECARD_STATE_PROVISIONING, + DEVLINK_LINECARD_STATE_PROVISIONING_FAILED, + DEVLINK_LINECARD_STATE_PROVISIONED, + + __DEVLINK_LINECARD_STATE_MAX, + DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -559,6 +571,9 @@ enum devlink_attr { DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, /* u32 */ DEVLINK_ATTR_LINECARD_INDEX, /* u32 */ + DEVLINK_ATTR_LINECARD_STATE, /* u8 */ + DEVLINK_ATTR_LINECARD_TYPE, /* string */ + DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From fc9f50d5b366cd9f35bdee22fe3f8d77833cb1d8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 18 Apr 2022 09:42:27 +0300 Subject: devlink: implement line card active state Allow driver to mark a line card as active. Expose this state to the userspace over devlink netlink interface with proper notifications. 'active' state means that line card was plugged in after being provisioned. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index de91e4a0d476..b3d40a5d72ff 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -350,6 +350,7 @@ enum devlink_linecard_state { DEVLINK_LINECARD_STATE_PROVISIONING, DEVLINK_LINECARD_STATE_PROVISIONING_FAILED, DEVLINK_LINECARD_STATE_PROVISIONED, + DEVLINK_LINECARD_STATE_ACTIVE, __DEVLINK_LINECARD_STATE_MAX, DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 -- cgit v1.2.3 From 38a6f0865796e26fc38fff4858f681d9ae76fa0f Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Sat, 16 Apr 2022 00:40:46 +0800 Subject: net: sched: support hash selecting tx queue This patch allows users to pick queue_mapping, range from A to B. Then we can load balance packets from A to B tx queue. The range is an unsigned 16bit value in decimal format. $ tc filter ... action skbedit queue_mapping skbhash A B "skbedit queue_mapping QUEUE_MAPPING" (from "man 8 tc-skbedit") is enhanced with flags: SKBEDIT_F_TXQ_SKBHASH +----+ +----+ +----+ | P1 | | P2 | | Pn | +----+ +----+ +----+ | | | +-----------+-----------+ | | clsact/skbedit | MQ v +-----------+-----------+ | q0 | qn | qm v v v HTB/FQ FIFO ... FIFO For example: If P1 sends out packets to different Pods on other host, and we want distribute flows from qn - qm. Then we can use skb->hash as hash. setup commands: $ NETDEV=eth0 $ ip netns add n1 $ ip link add ipv1 link $NETDEV type ipvlan mode l2 $ ip link set ipv1 netns n1 $ ip netns exec n1 ifconfig ipv1 2.2.2.100/24 up $ tc qdisc add dev $NETDEV clsact $ tc filter add dev $NETDEV egress protocol ip prio 1 \ flower skip_hw src_ip 2.2.2.100 action skbedit queue_mapping skbhash 2 6 $ tc qdisc add dev $NETDEV handle 1: root mq $ tc qdisc add dev $NETDEV parent 1:1 handle 2: htb $ tc class add dev $NETDEV parent 2: classid 2:1 htb rate 100kbit $ tc class add dev $NETDEV parent 2: classid 2:2 htb rate 200kbit $ tc qdisc add dev $NETDEV parent 1:2 tbf rate 100mbit burst 100mb latency 1 $ tc qdisc add dev $NETDEV parent 1:3 pfifo $ tc qdisc add dev $NETDEV parent 1:4 pfifo $ tc qdisc add dev $NETDEV parent 1:5 pfifo $ tc qdisc add dev $NETDEV parent 1:6 pfifo $ tc qdisc add dev $NETDEV parent 1:7 pfifo $ ip netns exec n1 iperf3 -c 2.2.2.1 -i 1 -t 10 -P 10 pick txqueue from 2 - 6: $ ethtool -S $NETDEV | grep -i tx_queue_[0-9]_bytes tx_queue_0_bytes: 42 tx_queue_1_bytes: 0 tx_queue_2_bytes: 11442586444 tx_queue_3_bytes: 7383615334 tx_queue_4_bytes: 3981365579 tx_queue_5_bytes: 3983235051 tx_queue_6_bytes: 6706236461 tx_queue_7_bytes: 42 tx_queue_8_bytes: 0 tx_queue_9_bytes: 0 txqueues 2 - 6 are mapped to classid 1:3 - 1:7 $ tc -s class show dev $NETDEV ... class mq 1:3 root leaf 8002: Sent 11949133672 bytes 7929798 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:4 root leaf 8003: Sent 7710449050 bytes 5117279 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:5 root leaf 8004: Sent 4157648675 bytes 2758990 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:6 root leaf 8005: Sent 4159632195 bytes 2759990 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:7 root leaf 8006: Sent 7003169603 bytes 4646912 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Jonathan Lemon Cc: Eric Dumazet Cc: Alexander Lobakin Cc: Paolo Abeni Cc: Talal Ahmad Cc: Kevin Hao Cc: Ilias Apalodimas Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Antoine Tenart Cc: Wei Wang Cc: Arnd Bergmann Signed-off-by: Tonghao Zhang Reviewed-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/uapi/linux/tc_act/tc_skbedit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 800e93377218..6cb6101208d0 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -29,6 +29,7 @@ #define SKBEDIT_F_PTYPE 0x8 #define SKBEDIT_F_MASK 0x10 #define SKBEDIT_F_INHERITDSFIELD 0x20 +#define SKBEDIT_F_TXQ_SKBHASH 0x40 struct tc_skbedit { tc_gen; @@ -45,6 +46,7 @@ enum { TCA_SKBEDIT_PTYPE, TCA_SKBEDIT_MASK, TCA_SKBEDIT_FLAGS, + TCA_SKBEDIT_QUEUE_MAPPING_MAX, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) -- cgit v1.2.3 From b617be33502d2bfefffef71924c7a7ba50264ff6 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 11 Apr 2022 21:45:27 +0300 Subject: spi: add SPI_RX_CPHA_FLIP mode bit Some SPI devices latch MOSI bits on one clock phase, but produce valid MISO bits on the other phase. Add SPI_RX_CPHA_FLIP mode to instruct the controller driver to flip CPHA for Rx (MISO) only transfers. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/a715ca92713ca02071f33dcca9960a66a03c949a.1649702729.git.baruch@tkos.co.il Signed-off-by: Mark Brown --- include/uapi/linux/spi/spi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/spi/spi.h b/include/uapi/linux/spi/spi.h index 236a85f08ded..9d5f58059703 100644 --- a/include/uapi/linux/spi/spi.h +++ b/include/uapi/linux/spi/spi.h @@ -27,6 +27,7 @@ #define SPI_TX_OCTAL _BITUL(13) /* transmit with 8 wires */ #define SPI_RX_OCTAL _BITUL(14) /* receive with 8 wires */ #define SPI_3WIRE_HIZ _BITUL(15) /* high impedance turnaround */ +#define SPI_RX_CPHA_FLIP _BITUL(16) /* flip CPHA on Rx only xfer */ /* * All the bits defined above should be covered by SPI_MODE_USER_MASK. @@ -36,6 +37,6 @@ * These bits must not overlap. A static assert check should make sure of that. * If adding extra bits, make sure to increase the bit index below as well. */ -#define SPI_MODE_USER_MASK (_BITUL(16) - 1) +#define SPI_MODE_USER_MASK (_BITUL(17) - 1) #endif /* _UAPI_SPI_H */ -- cgit v1.2.3 From b4000312822615ba2222e368188029e9b725dbf4 Mon Sep 17 00:00:00 2001 From: Boris Sukholitko Date: Tue, 19 Apr 2022 11:14:33 +0300 Subject: net/sched: flower: Add number of vlan tags filter These are bookkeeping parts of the new num_of_vlans filter. Defines, dump, load and set are being done here. Signed-off-by: Boris Sukholitko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 404f97fb239c..9a2ee1e39fad 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -587,6 +587,8 @@ enum { TCA_FLOWER_KEY_HASH, /* u32 */ TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + TCA_FLOWER_KEY_NUM_OF_VLANS, /* u8 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 9e4ab6c89109472082616f8d2f6ada7deaffe161 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:19 +0100 Subject: arm64/sme: Implement vector length configuration prctl()s As for SVE provide a prctl() interface which allows processes to configure their SME vector length. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-12-broonie@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/linux/prctl.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f0262..a5e06dcbba13 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -272,6 +272,15 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 -- cgit v1.2.3 From e12310a0d30f260b26297bc8d7c95769489af038 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:28 +0100 Subject: arm64/sme: Implement ptrace support for streaming mode SVE registers The streaming mode SVE registers are represented using the same data structures as for SVE but since the vector lengths supported and in use may not be the same as SVE we represent them with a new type NT_ARM_SSVE. Unfortunately we only have a single 16 bit reserved field available in the header so there is no space to fit the current and maximum vector length for both standard and streaming SVE mode without redefining the structure in a way the creates a complicatd and fragile ABI. Since FFR is not present in streaming mode it is read and written as zero. Setting NT_ARM_SSVE registers will put the task into streaming mode, similarly setting NT_ARM_SVE registers will exit it. Reads that do not correspond to the current mode of the task will return the header with no register data. For compatibility reasons on write setting no flag for the register type will be interpreted as setting SVE registers, though users can provide no register data as an alternative mechanism for doing so. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-21-broonie@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..a8dc688e1826 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -431,6 +431,7 @@ typedef struct elf64_shdr { #define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 776b4a1cf36411e96972455ca72906b722b80ea1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Apr 2022 12:22:29 +0100 Subject: arm64/sme: Add ptrace support for ZA The ZA array can be read and written with the NT_ARM_ZA. Similarly to our interface for the SVE vector registers the regset consists of a header with information on the current vector length followed by an optional register data payload, represented as for signals as a series of horizontal vectors from 0 to VL/8 in the endianness independent format used for vectors. On get if ZA is enabled then register data will be provided, otherwise it will be omitted. On set if register data is provided then ZA is enabled and initialized using the provided data, otherwise it is disabled. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220419112247.711548-22-broonie@kernel.org Signed-off-by: Catalin Marinas --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index a8dc688e1826..97808f958903 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -432,6 +432,7 @@ typedef struct elf64_shdr { #define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ -- cgit v1.2.3 From 26f89535a5bb17915a2e1062c3999a2ee797c7b0 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 13 Apr 2022 22:12:46 -0700 Subject: cxl/mbox: Use type __u32 for mailbox payload sizes Payload sizes for mailbox commands are expected to be positive values coming from userspace. The documentation correctly describes these as always unsigned values. The mailbox and send structures that support the mailbox commands however, use __s32 types for the payloads. Replace __s32 with __u32 in the mailbox and send command structures and update usages. Kernel users of the interface already block all negative values and there is no known ability for userspace to have grown a dependency on submitting negative values to the kernel. The known user of the IOCTL, the CXL command line interface (cxl-cli) already enforces positive size values. A Smatch warning of a signedness uncovered this issue. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/20220414051246.1244575-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/cxl_mem.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 8d206f27bb6d..c71021a2a9ed 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -68,8 +68,8 @@ static const struct { * struct cxl_command_info - Command information returned from a query. * @id: ID number for the command. * @flags: Flags that specify command behavior. - * @size_in: Expected input size, or -1 if variable length. - * @size_out: Expected output size, or -1 if variable length. + * @size_in: Expected input size, or ~0 if variable length. + * @size_out: Expected output size, or ~0 if variable length. * * Represents a single command that is supported by both the driver and the * hardware. This is returned as part of an array from the query ioctl. The @@ -78,7 +78,7 @@ static const struct { * * - @id = 10 * - @flags = 0 - * - @size_in = -1 + * - @size_in = ~0 * - @size_out = 0 * * See struct cxl_mem_query_commands. @@ -89,8 +89,8 @@ struct cxl_command_info { __u32 flags; #define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0) - __s32 size_in; - __s32 size_out; + __u32 size_in; + __u32 size_out; }; /** @@ -169,13 +169,13 @@ struct cxl_send_command { __u32 retval; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } in; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } out; -- cgit v1.2.3 From 567f882a401346779d05a90beb8f21865ebdd398 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 9 Mar 2022 10:55:43 +0000 Subject: media: cec.h: add cec_msg_recv_is_rx/tx_result helpers These two helper functions return true if the received message contains the result of a previous non-blocking transmit. Either the tx_status result (cec_msg_recv_is_tx_result) of the transmit, or the rx_status result (cec_msg_recv_is_rx_result) of the reply to the original transmit. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index de936f5e446d..1d48da926216 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -142,6 +142,26 @@ static inline void cec_msg_set_reply_to(struct cec_msg *msg, msg->reply = msg->timeout = 0; } +/** + * cec_msg_recv_is_tx_result - return true if this message contains the + * result of an earlier non-blocking transmit + * @msg: the message structure from CEC_RECEIVE + */ +static inline int cec_msg_recv_is_tx_result(const struct cec_msg *msg) +{ + return msg->sequence && msg->tx_status && !msg->rx_status; +} + +/** + * cec_msg_recv_is_rx_result - return true if this message contains the + * reply of an earlier non-blocking transmit + * @msg: the message structure from CEC_RECEIVE + */ +static inline int cec_msg_recv_is_rx_result(const struct cec_msg *msg) +{ + return msg->sequence && !msg->tx_status && msg->rx_status; +} + /* cec_msg flags field */ #define CEC_MSG_FL_REPLY_TO_FOLLOWERS (1 << 0) #define CEC_MSG_FL_RAW (1 << 1) -- cgit v1.2.3 From 4e4dab4bb6029dbee63f12a249ddc44b0124ea63 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Wed, 2 Mar 2022 22:03:01 +0000 Subject: media: media.h: Add new media link type To describe in the kernel the connection between devices and their supporting peripherals (for example, a camera sensor and the vcm driving the focusing lens for it), add a new type of media link to introduce the concept of these ancillary links. Add some elements to the uAPI documentation to explain the new link type, their purpose and some aspects of their current implementation. Reviewed-by: Laurent Pinchart Signed-off-by: Daniel Scally Reviewed-by: Jean-Michel Hautbois Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/media.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index 200fa8462b90..afbae7213d35 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -226,6 +226,7 @@ struct media_pad_desc { #define MEDIA_LNK_FL_LINK_TYPE (0xf << 28) # define MEDIA_LNK_FL_DATA_LINK (0 << 28) # define MEDIA_LNK_FL_INTERFACE_LINK (1 << 28) +# define MEDIA_LNK_FL_ANCILLARY_LINK (2 << 28) struct media_link_desc { struct media_pad_desc source; -- cgit v1.2.3 From 3d22dd432889f2f538b53f36f9f6bcd54825fc22 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 28 Mar 2022 17:01:53 +0100 Subject: media: media.h: remove unneeded inclusion Commit b3b7a9f138b7 ("[media] media-device: Use u64 ints for pointers") added this #include , presumably in order to use uintptr_t. Now that it is gone, we can compile this for userspace without . Signed-off-by: Masahiro Yamada Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/media.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index afbae7213d35..3ddadaea849f 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -20,9 +20,6 @@ #ifndef __LINUX_MEDIA_H #define __LINUX_MEDIA_H -#ifndef __KERNEL__ -#include -#endif #include #include -- cgit v1.2.3 From 2308d5aff8d083a44aa02197d2f5687b73d98f82 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Thu, 3 Mar 2022 15:06:31 +0000 Subject: media: v4l: Add Qualcomm custom compressed pixel formats Add custom Qualcomm raw compressed pixel formats. They are used in Qualcomm SoCs to optimize the interconnect bandwidth. Signed-off-by: Stanimir Varbanov Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 3768a0a80830..6d465dc443b7 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -746,6 +746,8 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_INZI v4l2_fourcc('I', 'N', 'Z', 'I') /* Intel Planar Greyscale 10-bit and Depth 16-bit */ #define V4L2_PIX_FMT_CNF4 v4l2_fourcc('C', 'N', 'F', '4') /* Intel 4-bit packed depth confidence information */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* BTTV 8-bit dithered RGB */ +#define V4L2_PIX_FMT_QC08C v4l2_fourcc('Q', '0', '8', 'C') /* Qualcomm 8-bit compressed */ +#define V4L2_PIX_FMT_QC10C v4l2_fourcc('Q', '1', '0', 'C') /* Qualcomm 10-bit compressed */ /* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */ #define V4L2_PIX_FMT_IPU3_SBGGR10 v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */ -- cgit v1.2.3 From fcbc4acf8b8dff5fc420a14026bd4ab1798cf465 Mon Sep 17 00:00:00 2001 From: Dikshita Agarwal Date: Tue, 19 Apr 2022 06:06:42 +0100 Subject: media: v4l2-ctrls: Add intra-refresh type control Add a control to set intra-refresh type. Signed-off-by: Dikshita Agarwal Reviewed-by: Nicolas Dufresne Acked-by: Hans Verkuil Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index bb40129446d4..dfff69ed88f7 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -449,6 +449,11 @@ enum v4l2_mpeg_video_multi_slice_mode { #define V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES (V4L2_CID_CODEC_BASE+234) #define V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR (V4L2_CID_CODEC_BASE+235) #define V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD (V4L2_CID_CODEC_BASE+236) +#define V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE (V4L2_CID_CODEC_BASE+237) +enum v4l2_mpeg_video_intra_refresh_period_type { + V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE_RANDOM = 0, + V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE_CYCLIC = 1, +}; /* CIDs for the MPEG-2 Part 2 (H.262) codec */ #define V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL (V4L2_CID_CODEC_BASE+270) -- cgit v1.2.3 From 8e29da69feade64ec7fe9e1a2824b967c5183a21 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2022 10:44:00 -0600 Subject: io_uring: add support for IORING_ASYNC_CANCEL_ALL The current cancelation will lookup and cancel the first request it finds based on the key passed in. Add a flag that allows to cancel any request that matches they key. It completes with the number of requests found and canceled, or res < 0 if an error occured. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220418164402.75259-4-axboe@kernel.dk --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1845cf7c80ba..476e58a2837f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -187,6 +187,13 @@ enum { #define IORING_POLL_UPDATE_EVENTS (1U << 1) #define IORING_POLL_UPDATE_USER_DATA (1U << 2) +/* + * ASYNC_CANCEL flags. + * + * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key + */ +#define IORING_ASYNC_CANCEL_ALL (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 4bf94615b8886305199ed5755cb72fea88258d15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2022 10:44:01 -0600 Subject: io_uring: allow IORING_OP_ASYNC_CANCEL with 'fd' key Currently sqe->addr must contain the user_data of the request being canceled. Introduce the IORING_ASYNC_CANCEL_FD flag, which tells the kernel that we're keying off the file fd instead for cancelation. This allows canceling any request that a) uses a file, and b) was assigned the file based on the value being passed in. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220418164402.75259-5-axboe@kernel.dk --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 476e58a2837f..cc7fe82a1798 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -191,8 +191,11 @@ enum { * ASYNC_CANCEL flags. * * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key + * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the + * request 'user_data' */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) +#define IORING_ASYNC_CANCEL_FD (1U << 1) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From 970f256edb8c1259c8ed48d52b38215135396126 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2022 10:44:02 -0600 Subject: io_uring: add support for IORING_ASYNC_CANCEL_ANY Rather than match on a specific key, be it user_data or file, allow canceling any request that we can lookup. Works like IORING_ASYNC_CANCEL_ALL in that it cancels multiple requests, but it doesn't key off user_data or the file. Can't be set with IORING_ASYNC_CANCEL_FD, as that's a key selector. Only one may be used at the time. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220418164402.75259-6-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cc7fe82a1798..980d82eb196e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -193,9 +193,11 @@ enum { * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' + * IORING_ASYNC_CANCEL_ANY Match any request */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) +#define IORING_ASYNC_CANCEL_ANY (1U << 2) /* * IO completion data structure (Completion Queue Entry) -- cgit v1.2.3 From e9621e2bec80fe63f677a759066a5089b292f43a Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Wed, 23 Mar 2022 08:44:19 -0700 Subject: io_uring: add fsetxattr and setxattr support This adds support to io_uring for the fsetxattr and setxattr API. Signed-off-by: Stefan Roesch Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20220323154420.3301504-4-shr@fb.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 980d82eb196e..864bd6a4d4ff 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -45,6 +45,7 @@ struct io_uring_sqe { __u32 rename_flags; __u32 unlink_flags; __u32 hardlink_flags; + __u32 xattr_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -60,7 +61,8 @@ struct io_uring_sqe { __s32 splice_fd_in; __u32 file_index; }; - __u64 __pad2[2]; + __u64 addr3; + __u64 __pad2[1]; }; enum { @@ -145,6 +147,8 @@ enum { IORING_OP_SYMLINKAT, IORING_OP_LINKAT, IORING_OP_MSG_RING, + IORING_OP_FSETXATTR, + IORING_OP_SETXATTR, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From a56834e0fafe0adf7f22a28a5dbec3e8c3031a0e Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Wed, 23 Mar 2022 08:44:20 -0700 Subject: io_uring: add fgetxattr and getxattr support This adds support to io_uring for the fgetxattr and getxattr API. Signed-off-by: Stefan Roesch Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20220323154420.3301504-5-shr@fb.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 864bd6a4d4ff..8ca1d9ae56d6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -149,6 +149,8 @@ enum { IORING_OP_MSG_RING, IORING_OP_FSETXATTR, IORING_OP_SETXATTR, + IORING_OP_FGETXATTR, + IORING_OP_GETXATTR, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 1374e08e2d44863c931910797852589803997668 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2022 14:22:40 -0600 Subject: io_uring: add socket(2) support Supports both regular socket(2) where a normal file descriptor is instantiated when called, or direct descriptors. Link: https://lore.kernel.org/r/20220412202240.234207-3-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8ca1d9ae56d6..5fb52bf32435 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -151,6 +151,7 @@ enum { IORING_OP_SETXATTR, IORING_OP_FGETXATTR, IORING_OP_GETXATTR, + IORING_OP_SOCKET, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 8d92e4fbcf0fb7ecb24223b7b1ce95b9beb4dfa2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Apr 2022 06:44:21 +0300 Subject: devlink: introduce line card devices support Line card can contain one or more devices that makes sense to make visible to the user. For example, this can be a gearbox with flash memory, which could be updated. Provide the driver possibility to attach such devices to a line card and expose those to user. Example: $ devlink lc show pci/0000:01:00.0 lc 8 pci/0000:01:00.0: lc 8 state active type 16x100G supported_types: 16x100G devices: device 0 device 1 device 2 device 3 Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3d40a5d72ff..cd578645f94f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -575,6 +575,9 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_STATE, /* u8 */ DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ + DEVLINK_ATTR_LINECARD_DEVICE_LIST, /* nested */ + DEVLINK_ATTR_LINECARD_DEVICE, /* nested */ + DEVLINK_ATTR_LINECARD_DEVICE_INDEX, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From 276910aecc6a4076f5fbfd8160ff70695d6c1eb5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Apr 2022 06:44:22 +0300 Subject: devlink: introduce line card info get message Allow the driver to provide per line card info get op to fill-up info, similar to the "devlink dev info". Example: $ devlink lc info pci/0000:01:00.0 lc 8 pci/0000:01:00.0: lc 8 versions: fixed: hw.revision 0 running: ini.version 4 Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index cd578645f94f..fb8c3864457f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,6 +136,8 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, + DEVLINK_CMD_LINECARD_INFO_GET, /* can dump */ + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 -- cgit v1.2.3 From e8e7fbb6a39cd6761c843d97851eb40c5885e922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Tue, 12 Apr 2022 11:49:43 +0200 Subject: PCI: Add PCI_EXP_SLTCTL_ASPL_DISABLE macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add macro defining Auto Slot Power Limit Disable bit in Slot Control Register. Link: https://lore.kernel.org/r/20220412094946.27069-2-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Marek Behún Signed-off-by: Lorenzo Pieralisi Acked-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index bee1a9ed6e66..108f8523fa04 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -616,6 +616,7 @@ #define PCI_EXP_SLTCTL_PWR_OFF 0x0400 /* Power Off */ #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ +#define PCI_EXP_SLTCTL_ASPL_DISABLE 0x2000 /* Auto Slot Power Limit Disable */ #define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ #define PCI_EXP_SLTSTA 0x1a /* Slot Status */ #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ -- cgit v1.2.3 From 7d5e005d982527e4029b0139823d179986e34cdc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 22 Apr 2022 15:03:25 +0300 Subject: fanotify: implement "evictable" inode marks When an inode mark is created with flag FAN_MARK_EVICTABLE, it will not pin the marked inode to inode cache, so when inode is evicted from cache due to memory pressure, the mark will be lost. When an inode mark with flag FAN_MARK_EVICATBLE is updated without using this flag, the marked inode is pinned to inode cache. When an inode mark is updated with flag FAN_MARK_EVICTABLE but an existing mark already has the inode pinned, the mark update fails with error EEXIST. Evictable inode marks can be used to setup inode marks with ignored mask to suppress events from uninteresting files or directories in a lazy manner, upon receiving the first event, without having to iterate all the uninteresting files or directories before hand. The evictbale inode mark feature allows performing this lazy marks setup without exhausting the system memory with pinned inodes. This change does not enable the feature yet. Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiRDpuS=2uA6+ZUM7yG9vVU-u212tkunBmSnP_u=mkv=Q@mail.gmail.com/ Link: https://lore.kernel.org/r/20220422120327.3459282-15-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index e8ac38cc2fd6..f1f89132d60e 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -82,6 +82,7 @@ #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 #define FAN_MARK_FLUSH 0x00000080 /* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 /* These are NOT bitwise flags. Both bits can be used togther. */ #define FAN_MARK_INODE 0x00000000 -- cgit v1.2.3 From c0a5a21c25f37c9fd7b36072f9968cdff1e4aa13 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 25 Apr 2022 03:18:51 +0530 Subject: bpf: Allow storing referenced kptr in map Extending the code in previous commits, introduce referenced kptr support, which needs to be tagged using 'kptr_ref' tag instead. Unlike unreferenced kptr, referenced kptr have a lot more restrictions. In addition to the type matching, only a newly introduced bpf_kptr_xchg helper is allowed to modify the map value at that offset. This transfers the referenced pointer being stored into the map, releasing the references state for the program, and returning the old value and creating new reference state for the returned pointer. Similar to unreferenced pointer case, return value for this case will also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer must either be eventually released by calling the corresponding release function, otherwise it must be transferred into another map. It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear the value, and obtain the old value if any. BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future commit will permit using BPF_LDX for such pointers, but attempt at making it safe, since the lifetime of object won't be guaranteed. There are valid reasons to enforce the restriction of permitting only bpf_kptr_xchg to operate on referenced kptr. The pointer value must be consistent in face of concurrent modification, and any prior values contained in the map must also be released before a new one is moved into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg returns the old value, which the verifier would require the user to either free or move into another map, and releases the reference held for the pointer being moved in. In the future, direct BPF_XCHG instruction may also be permitted to work like bpf_kptr_xchg helper. Note that process_kptr_func doesn't have to call check_helper_mem_access, since we already disallow rdonly/wronly flags for map, which is what check_map_access_type checks, and we already ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc, so check_map_access is also not required. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com --- include/uapi/linux/bpf.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d14b10b85e51..444fe6f1cf35 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5143,6 +5143,17 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5350,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From d8fc1c7c4c9b705ce5f5bba772ad66a0137c685d Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Mon, 25 Apr 2022 09:16:17 -0500 Subject: tee: remove flags TEE_IOCTL_SHM_MAPPED and TEE_IOCTL_SHM_DMA_BUF These look to be leftover from an early edition of this driver. Userspace does not need this information. Checking all users of this that I have access to I have verified no one is using them. They leak internal use flags out to userspace. Even more they are not correct anymore after a45ea4efa358. Lets drop these flags before someone does try to use them for something and they become ABI. Signed-off-by: Andrew Davis Acked-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 25a6c534beb1..23e57164693c 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -42,10 +42,6 @@ #define TEE_IOC_MAGIC 0xa4 #define TEE_IOC_BASE 0 -/* Flags relating to shared memory */ -#define TEE_IOCTL_SHM_MAPPED 0x1 /* memory mapped in normal world */ -#define TEE_IOCTL_SHM_DMA_BUF 0x2 /* dma-buf handle on shared memory */ - #define TEE_MAX_ARG_SIZE 1024 #define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ -- cgit v1.2.3 From cc51eaa8b530bf070e76847a717adcbf603469b7 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 26 Apr 2022 01:29:04 -0700 Subject: io_uring: add type to op enum It is useful to have a type enum for opcodes, to allow the compiler to assert that every value is used in a switch statement. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220426082907.3600028-2-dylany@fb.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 5fb52bf32435..49d1f3994f8d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -105,7 +105,7 @@ enum { #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ -enum { +enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV, -- cgit v1.2.3 From 052e1f01bfae8be6f31b61ed3a2356edfca855dc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Apr 2022 10:54:33 -0700 Subject: net: atm: remove support for ZeitNet ZN122x ATM devices This driver received nothing but automated fixes in the last 15 years. Since it's using virt_to_bus it's unlikely to be used on any modern platform. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/atm_zatm.h | 47 ------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 include/uapi/linux/atm_zatm.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h deleted file mode 100644 index 5135027b93c1..000000000000 --- a/include/uapi/linux/atm_zatm.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* atm_zatm.h - Driver-specific declarations of the ZATM driver (for use by - driver-specific utilities) */ - -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef LINUX_ATM_ZATM_H -#define LINUX_ATM_ZATM_H - -/* - * Note: non-kernel programs including this file must also include - * sys/types.h for struct timeval - */ - -#include -#include - -#define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) - /* get pool statistics */ -#define ZATM_GETPOOLZ _IOW('a',ATMIOC_SARPRV+2,struct atmif_sioc) - /* get statistics and zero */ -#define ZATM_SETPOOL _IOW('a',ATMIOC_SARPRV+3,struct atmif_sioc) - /* set pool parameters */ - -struct zatm_pool_info { - int ref_count; /* free buffer pool usage counters */ - int low_water,high_water; /* refill parameters */ - int rqa_count,rqu_count; /* queue condition counters */ - int offset,next_off; /* alignment optimizations: offset */ - int next_cnt,next_thres; /* repetition counter and threshold */ -}; - -struct zatm_pool_req { - int pool_num; /* pool number */ - struct zatm_pool_info info; /* actual information */ -}; - -#define ZATM_OAM_POOL 0 /* free buffer pool for OAM cells */ -#define ZATM_AAL0_POOL 1 /* free buffer pool for AAL0 cells */ -#define ZATM_AAL5_POOL_BASE 2 /* first AAL5 free buffer pool */ -#define ZATM_LAST_POOL ZATM_AAL5_POOL_BASE+10 /* max. 64 kB */ - -#define ZATM_TIMER_HISTORY_SIZE 16 /* number of timer adjustments to - record; must be 2^n */ - -#endif -- cgit v1.2.3 From c35fe2a68f29a0bda15ae994154cacaae5f69791 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 25 Apr 2022 16:18:33 +0100 Subject: elf: Fix the arm64 MTE ELF segment name and value Unfortunately, the name/value choice for the MTE ELF segment type (PT_ARM_MEMTAG_MTE) was pretty poor: LOPROC+1 is already in use by PT_AARCH64_UNWIND, as defined in the AArch64 ELF ABI (https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst). Update the ELF segment type value to LOPROC+2 and also change the define to PT_AARCH64_MEMTAG_MTE to match the AArch64 ELF ABI namespace. The AArch64 ELF ABI document is updating accordingly (segment type not previously mentioned in the document). Signed-off-by: Catalin Marinas Fixes: 761b9b366cec ("elf: Introduce the ARM MTE ELF segment type") Cc: Will Deacon Cc: Jonathan Corbet Cc: Eric Biederman Cc: Kees Cook Cc: Luis Machado Cc: Richard Earnshaw Link: https://lore.kernel.org/r/20220425151833.2603830-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- include/uapi/linux/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..7ce993e6786c 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -42,7 +42,7 @@ typedef __s64 Elf64_Sxword; /* ARM MTE memory tag segment type */ -#define PT_ARM_MEMTAG_MTE (PT_LOPROC + 0x1) +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) /* * Extended Numbering -- cgit v1.2.3 From d495f942f40aa412f8d4d65951152648cfa09903 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Apr 2022 12:30:13 +0200 Subject: KVM: fix bad user ABI for KVM_EXIT_SYSTEM_EVENT When KVM_EXIT_SYSTEM_EVENT was introduced, it included a flags member that at the time was unused. Unfortunately this extensibility mechanism has several issues: - x86 is not writing the member, so it would not be possible to use it on x86 except for new events - the member is not aligned to 64 bits, so the definition of the uAPI struct is incorrect for 32- on 64-bit userspace. This is a problem for RISC-V, which supports CONFIG_KVM_COMPAT, but fortunately usage of flags was only introduced in 5.18. Since padding has to be introduced, place a new field in there that tells if the flags field is valid. To allow further extensibility, in fact, change flags to an array of 16 values, and store how many of the values are valid. The availability of the new ndata field is tied to a system capability; all architectures are changed to fill in the field. To avoid breaking compilation of userspace that was using the flags field, provide a userspace-only union to overlap flags with data[0]. The new field is placed at the same offset for both 32- and 64-bit userspace. Cc: Will Deacon Cc: Marc Zyngier Cc: Peter Gonda Cc: Sean Christopherson Signed-off-by: Paolo Bonzini Reported-by: kernel test robot Message-Id: <20220422103013.34832-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 91a6fe4e02c0..6a184d260c7f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,7 +445,13 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; - __u64 flags; + __u32 ndata; + union { +#ifndef __KERNEL__ + __u64 flags; +#endif + __u64 data[16]; + }; } system_event; /* KVM_EXIT_S390_STSI */ struct { @@ -1144,6 +1150,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_MEM_OP_EXTENSION 211 #define KVM_CAP_PMU_CAPABILITY 212 #define KVM_CAP_DISABLE_QUIRKS2 213 +/* #define KVM_CAP_VM_TSC_CONTROL 214 */ +#define KVM_CAP_SYSTEM_EVENT_DATA 215 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 0e0af57e0e91b304f36b7d1dba859e3c04094273 Mon Sep 17 00:00:00 2001 From: "Dr. Thomas Orgis" Date: Fri, 29 Apr 2022 14:38:03 -0700 Subject: taskstats: version 12 with thread group and exe info The task exit struct needs some crucial information to be able to provide an enhanced version of process and thread accounting. This change provides: 1. ac_tgid in additon to ac_pid 2. thread group execution walltime in ac_tgetime 3. flag AGROUP in ac_flag to indicate the last task in a thread group / process 4. device ID and inode of task's /proc/self/exe in ac_exe_dev and ac_exe_inode 5. tools/accounting/procacct as demonstrator When a task exits, taskstats are reported to userspace including the task's pid and ppid, but without the id of the thread group this task is part of. Without the tgid, the stats of single tasks cannot be correlated to each other as a thread group (process). The taskstats documentation suggests that on process exit a data set consisting of accumulated stats for the whole group is produced. But such an additional set of stats is only produced for actually multithreaded processes, not groups that had only one thread, and also those stats only contain data about delay accounting and not the more basic information about CPU and memory resource usage. Adding the AGROUP flag to be set when the last task of a group exited enables determination of process end also for single-threaded processes. My applicaton basically does enhanced process accounting with summed cputime, biggest maxrss, tasks per process. The data is not available with the traditional BSD process accounting (which is not designed to be extensible) and the taskstats interface allows more efficient on-the-fly grouping and summing of the stats, anyway, without intermediate disk writes. Furthermore, I do carry statistics on which exact program binary is used how often with associated resources, getting a picture on how important which parts of a collection of installed scientific software in different versions are, and how well they put load on the machine. This is enabled by providing information on /proc/self/exe for each task. I assume the two 64-bit fields for device ID and inode are more appropriate than the possibly large resolved path to keep the data volume down. Add the tgid to the stats to complete task identification, the flag AGROUP to mark the last task of a group, the group wallclock time, and inode-based identification of the associated executable file. Add tools/accounting/procacct.c as a simplified fork of getdelays.c to demonstrate process and thread accounting. [thomas.orgis@uni-hamburg.de: fix version number in comment] Link: https://lkml.kernel.org/r/20220405003601.7a5f6008@plasteblaster Link: https://lkml.kernel.org/r/20220331004106.64e5616b@plasteblaster Signed-off-by: Dr. Thomas Orgis Reviewed-by: Ismael Luceno Cc: Balbir Singh Cc: Eric W. Biederman Cc: xu xin Cc: Yang Yang Signed-off-by: Andrew Morton --- include/uapi/linux/acct.h | 3 ++- include/uapi/linux/taskstats.h | 24 +++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/acct.h b/include/uapi/linux/acct.h index 985b89068591..0e591152aa8a 100644 --- a/include/uapi/linux/acct.h +++ b/include/uapi/linux/acct.h @@ -103,12 +103,13 @@ struct acct_v3 /* * accounting flags */ - /* bit set when the process ... */ + /* bit set when the process/task ... */ #define AFORK 0x01 /* ... executed fork, but did not exec */ #define ASU 0x02 /* ... used super-user privileges */ #define ACOMPAT 0x04 /* ... used compatibility mode (VAX only not used) */ #define ACORE 0x08 /* ... dumped core */ #define AXSIG 0x10 /* ... was killed by a signal */ +#define AGROUP 0x20 /* ... was the last task of the process (task group) */ #if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) #define ACCT_BYTEORDER 0x80 /* accounting file is big endian */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 12327d32378f..736154171489 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 11 +#define TASKSTATS_VERSION 12 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -48,7 +48,8 @@ struct taskstats { __u32 ac_exitcode; /* Exit status */ /* The accounting flags of a task as defined in - * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. + * Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP. + * (AGROUP since version 12). */ __u8 ac_flag; /* Record flags */ __u8 ac_nice; /* task_nice */ @@ -173,9 +174,26 @@ struct taskstats { /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ - /* Delay waiting for memory compact */ + /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; + + /* v12 begin */ + __u32 ac_tgid; /* thread group ID */ + /* Thread group walltime up to now. This is total process walltime if + * AGROUP flag is set. + */ + __u64 ac_tgetime __attribute__((aligned(8))); + /* Lightweight information to identify process binary files. + * This leaves userspace to match this to a file system path, using + * MAJOR() and MINOR() macros to identify a device and mount point, + * the inode to identify the executable file. This is /proc/self/exe + * at the end, so matching the most recent exec(). Values are zero + * for kernel threads. + */ + __u64 ac_exe_dev; /* program binary device ID */ + __u64 ac_exe_inode; /* program binary inode number */ + /* v12 end */ }; -- cgit v1.2.3 From f548a12efd5ab97e6b1fb332e5634ce44b3d9328 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Apr 2022 17:39:50 -0600 Subject: io_uring: return hint on whether more data is available after receive For now just use a CQE flag for this, with big CQE support we could return the actual number of bytes left. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 49d1f3994f8d..92d1799892b2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -220,9 +220,11 @@ struct io_uring_cqe { * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries + * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) +#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From e1169f06d5bbdbc2b22ae4e3083a4bf75ae5ecee Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Apr 2022 19:49:03 -0600 Subject: io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_COOP_TASKRUN is used If this is set, io_uring will never use an IPI to deliver a task_work notification. This can be used in the common case where a single task or thread communicates with the ring, and doesn't rely on io_uring_cqe_peek(). This provides a noticeable win in performance, both from eliminating the IPI itself, but also from avoiding interrupting the submitting task unnecessarily. Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20220426014904.60384-6-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 980d82eb196e..a84f29d657c3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -102,6 +102,14 @@ enum { #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +/* + * Cooperative task running. When requests complete, they often require + * forcing the submitter to transition to the kernel to complete. If this + * flag is set, work will be done when the task transitions anyway, rather + * than force an inter-processor interrupt reschedule. This avoids interrupting + * a task running in userspace, and saves an IPI. + */ +#define IORING_SETUP_COOP_TASKRUN (1U << 8) enum { IORING_OP_NOP, -- cgit v1.2.3 From ef060ea9e4fd3b763e7060a3af0a258d2d5d7c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Apr 2022 19:49:04 -0600 Subject: io_uring: add IORING_SETUP_TASKRUN_FLAG If IORING_SETUP_COOP_TASKRUN is set to use cooperative scheduling for running task_work, then IORING_SETUP_TASKRUN_FLAG can be set so the application can tell if task_work is pending in the kernel for this ring. This allows use cases like io_uring_peek_cqe() to still function appropriately, or for the task to know when it would be useful to call io_uring_wait_cqe() to run pending events. Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20220426014904.60384-7-axboe@kernel.dk Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a84f29d657c3..fad63564678a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -110,6 +110,12 @@ enum { * a task running in userspace, and saves an IPI. */ #define IORING_SETUP_COOP_TASKRUN (1U << 8) +/* + * If COOP_TASKRUN is set, get notified if task work is available for + * running and a kernel transition would be needed to run it. This sets + * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + */ +#define IORING_SETUP_TASKRUN_FLAG (1U << 9) enum { IORING_OP_NOP, @@ -256,6 +262,7 @@ struct io_sqring_offsets { */ #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ +#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ struct io_cqring_offsets { __u32 head; -- cgit v1.2.3 From 3254e0b9eb5649ffaa48717ebc9c593adc4ee6a9 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:31 +0300 Subject: ethtool: Add 10base-T1L link mode entry Add entry for the 10base-T1L full duplex mode. Reviewed-by: Andrew Lunn Reviewed-by: Oleksij Rempel Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7bc4b8def12c..e0f0ee9bc89e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1691,6 +1691,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, + ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; -- cgit v1.2.3 From 909b4f2bf764a903e9183111368f1509f9b40e6d Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:32 +0300 Subject: net: phy: Add 10-BaseT1L registers The 802.3gc specification defines the 10-BaseT1L link mode for ethernet trafic on twisted wire pair. PMA status register can be used to detect if the phy supports 2.4 V TX level and PCS control register can be used to enable/disable PCS level loopback. Reviewed-by: Andrew Lunn Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index c54e6eae5366..0b2eba36dd7c 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -67,6 +67,9 @@ #define MDIO_PCS_10GBRT_STAT2 33 /* 10GBASE-R/-T PCS status 2 */ #define MDIO_AN_10GBT_CTRL 32 /* 10GBASE-T auto-negotiation control */ #define MDIO_AN_10GBT_STAT 33 /* 10GBASE-T auto-negotiation status */ +#define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ +#define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ +#define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -268,6 +271,28 @@ #define MDIO_AN_10GBT_STAT_MS 0x4000 /* Master/slave config */ #define MDIO_AN_10GBT_STAT_MSFLT 0x8000 /* Master/slave config fault */ +/* 10BASE-T1L PMA control */ +#define MDIO_PMA_10T1L_CTRL_LB_EN 0x0001 /* Enable loopback mode */ +#define MDIO_PMA_10T1L_CTRL_EEE_EN 0x0400 /* Enable EEE mode */ +#define MDIO_PMA_10T1L_CTRL_LOW_POWER 0x0800 /* Low-power mode */ +#define MDIO_PMA_10T1L_CTRL_2V4_EN 0x1000 /* Enable 2.4 Vpp operating mode */ +#define MDIO_PMA_10T1L_CTRL_TX_DIS 0x4000 /* Transmit disable */ +#define MDIO_PMA_10T1L_CTRL_PMA_RST 0x8000 /* MA reset */ + +/* 10BASE-T1L PMA status register. */ +#define MDIO_PMA_10T1L_STAT_LINK 0x0001 /* PMA receive link up */ +#define MDIO_PMA_10T1L_STAT_FAULT 0x0002 /* Fault condition detected */ +#define MDIO_PMA_10T1L_STAT_POLARITY 0x0004 /* Receive polarity is reversed */ +#define MDIO_PMA_10T1L_STAT_RECV_FAULT 0x0200 /* Able to detect fault on receive path */ +#define MDIO_PMA_10T1L_STAT_EEE 0x0400 /* PHY has EEE ability */ +#define MDIO_PMA_10T1L_STAT_LOW_POWER 0x0800 /* PMA has low-power ability */ +#define MDIO_PMA_10T1L_STAT_2V4_ABLE 0x1000 /* PHY has 2.4 Vpp operating mode ability */ +#define MDIO_PMA_10T1L_STAT_LB_ABLE 0x2000 /* PHY has loopback ability */ + +/* 10BASE-T1L PCS control register. */ +#define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ +#define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and -- cgit v1.2.3 From 1b020e448e0fb67bcb04ee0f778d413045f965d3 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:33 +0300 Subject: net: phy: Add BaseT1 auto-negotiation registers Added BASE-T1 AN advertisement register (Registers 7.514, 7.515, and 7.516) and BASE-T1 AN LP Base Page ability register (Registers 7.517, 7.518, and 7.519). Reviewed-by: Andrew Lunn Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 0b2eba36dd7c..fa3515257f54 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -70,6 +70,14 @@ #define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ #define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ #define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ +#define MDIO_AN_T1_CTRL 512 /* BASE-T1 AN control */ +#define MDIO_AN_T1_STAT 513 /* BASE-T1 AN status */ +#define MDIO_AN_T1_ADV_L 514 /* BASE-T1 AN advertisement register [15:0] */ +#define MDIO_AN_T1_ADV_M 515 /* BASE-T1 AN advertisement register [31:16] */ +#define MDIO_AN_T1_ADV_H 516 /* BASE-T1 AN advertisement register [47:32] */ +#define MDIO_AN_T1_LP_L 517 /* BASE-T1 AN LP Base Page ability register [15:0] */ +#define MDIO_AN_T1_LP_M 518 /* BASE-T1 AN LP Base Page ability register [31:16] */ +#define MDIO_AN_T1_LP_H 519 /* BASE-T1 AN LP Base Page ability register [47:32] */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -293,6 +301,38 @@ #define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ #define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ +/* BASE-T1 auto-negotiation advertisement register [15:0] */ +#define MDIO_AN_T1_ADV_L_PAUSE_CAP ADVERTISE_PAUSE_CAP +#define MDIO_AN_T1_ADV_L_PAUSE_ASYM ADVERTISE_PAUSE_ASYM +#define MDIO_AN_T1_ADV_L_FORCE_MS 0x1000 /* Force Master/slave Configuration */ +#define MDIO_AN_T1_ADV_L_REMOTE_FAULT ADVERTISE_RFAULT +#define MDIO_AN_T1_ADV_L_ACK ADVERTISE_LPACK +#define MDIO_AN_T1_ADV_L_NEXT_PAGE_REQ ADVERTISE_NPAGE + +/* BASE-T1 auto-negotiation advertisement register [31:16] */ +#define MDIO_AN_T1_ADV_M_B10L 0x4000 /* device is compatible with 10BASE-T1L */ +#define MDIO_AN_T1_ADV_M_MST 0x0010 /* advertise master preference */ + +/* BASE-T1 auto-negotiation advertisement register [47:32] */ +#define MDIO_AN_T1_ADV_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level Transmit Request */ +#define MDIO_AN_T1_ADV_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level Transmit Ability */ + +/* BASE-T1 AN LP Base Page ability register [15:0] */ +#define MDIO_AN_T1_LP_L_PAUSE_CAP LPA_PAUSE_CAP +#define MDIO_AN_T1_LP_L_PAUSE_ASYM LPA_PAUSE_ASYM +#define MDIO_AN_T1_LP_L_FORCE_MS 0x1000 /* LP Force Master/slave Configuration */ +#define MDIO_AN_T1_LP_L_REMOTE_FAULT LPA_RFAULT +#define MDIO_AN_T1_LP_L_ACK LPA_LPACK +#define MDIO_AN_T1_LP_L_NEXT_PAGE_REQ LPA_NPAGE + +/* BASE-T1 AN LP Base Page ability register [31:16] */ +#define MDIO_AN_T1_LP_M_MST 0x0010 /* LP master preference */ +#define MDIO_AN_T1_LP_M_B10L 0x4000 /* LP is compatible with 10BASE-T1L */ + +/* BASE-T1 AN LP Base Page ability register [47:32] */ +#define MDIO_AN_T1_LP_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level LP Transmit Request */ +#define MDIO_AN_T1_LP_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level LP Transmit Ability */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and -- cgit v1.2.3 From 3da8ffd8545f62fec85a48a3c637b2f427974f11 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:34 +0300 Subject: net: phy: Add 10BASE-T1L support in phy-c45 This patch is needed because the BASE-T1 uses different registers for status, control and advertisement to those already employed in the existing phy-c45 functions. Where required, genphy_c45 functions will now check whether the device supports BASE-T1 and use the specific registers instead: 45.2.7.19 BASE-T1 AN control register, 45.2.7.20 BASE-T1 AN status, 45.2.7.21 BASE-T1 AN advertisement register, 45.2.7.22 BASE-T1 AN LP Base Page ability register, 45.2.1.185 BASE-T1 PMA/PMD control register. Tested-by: Oleksij Rempel Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index fa3515257f54..75b7257a51e1 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -70,6 +70,7 @@ #define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ #define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ #define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ +#define MDIO_PMA_PMD_BT1 18 /* BASE-T1 PMA/PMD extended ability */ #define MDIO_AN_T1_CTRL 512 /* BASE-T1 AN control */ #define MDIO_AN_T1_STAT 513 /* BASE-T1 AN status */ #define MDIO_AN_T1_ADV_L 514 /* BASE-T1 AN advertisement register [15:0] */ @@ -78,6 +79,7 @@ #define MDIO_AN_T1_LP_L 517 /* BASE-T1 AN LP Base Page ability register [15:0] */ #define MDIO_AN_T1_LP_M 518 /* BASE-T1 AN LP Base Page ability register [31:16] */ #define MDIO_AN_T1_LP_H 519 /* BASE-T1 AN LP Base Page ability register [47:32] */ +#define MDIO_PMA_PMD_BT1_CTRL 2100 /* BASE-T1 PMA/PMD control register */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -170,6 +172,7 @@ #define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ #define MDIO_PMA_CTRL2_2_5GBT 0x0030 /* 2.5GBaseT type */ #define MDIO_PMA_CTRL2_5GBT 0x0031 /* 5GBaseT type */ +#define MDIO_PMA_CTRL2_BASET1 0x003D /* BASE-T1 type */ #define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ #define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ #define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ @@ -223,6 +226,7 @@ #define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ #define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ #define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ +#define MDIO_PMA_EXTABLE_BT1 0x0800 /* BASE-T1 ability */ #define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ /* PHY XGXS lane state register. */ @@ -301,6 +305,9 @@ #define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ #define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ +/* BASE-T1 PMA/PMD extended ability register. */ +#define MDIO_PMA_PMD_BT1_B10L_ABLE 0x0004 /* 10BASE-T1L Ability */ + /* BASE-T1 auto-negotiation advertisement register [15:0] */ #define MDIO_AN_T1_ADV_L_PAUSE_CAP ADVERTISE_PAUSE_CAP #define MDIO_AN_T1_ADV_L_PAUSE_ASYM ADVERTISE_PAUSE_ASYM @@ -333,6 +340,9 @@ #define MDIO_AN_T1_LP_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level LP Transmit Request */ #define MDIO_AN_T1_LP_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level LP Transmit Ability */ +/* BASE-T1 PMA/PMD control register */ +#define MDIO_PMA_PMD_BT1_CTRL_CFG_MST 0x4000 /* MASTER-SLAVE config value */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and -- cgit v1.2.3 From c2aa2dfef243efe213a480a1ee8566507a5152f4 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Tue, 3 May 2022 01:09:56 -0700 Subject: seccomp: Add wait_killable semantic to seccomp user notifier This introduces a per-filter flag (SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) that makes it so that when notifications are received by the supervisor the notifying process will transition to wait killable semantics. Although wait killable isn't a set of semantics formally exposed to userspace, the concept is searchable. If the notifying process is signaled prior to the notification being received by the userspace agent, it will be handled as normal. One quirk about how this is handled is that the notifying process only switches to TASK_KILLABLE if it receives a wakeup from either an addfd or a signal. This is to avoid an unnecessary wakeup of the notifying task. The reasons behind switching into wait_killable only after userspace receives the notification are: * Avoiding unncessary work - Often, workloads will perform work that they may abort (request racing comes to mind). This allows for syscalls to be aborted safely prior to the notification being received by the supervisor. In this, the supervisor doesn't end up doing work that the workload does not want to complete anyways. * Avoiding side effects - We don't want the syscall to be interruptible once the supervisor starts doing work because it may not be trivial to reverse the operation. For example, unmounting a file system may take a long time, and it's hard to rollback, or treat that as reentrant. * Avoid breaking runtimes - Various runtimes do not GC when they are during a syscall (or while running native code that subsequently calls a syscall). If many notifications are blocked, and not picked up by the supervisor, this can get the application into a bad state. Signed-off-by: Sargun Dhillon Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220503080958.20220-2-sargun@sargun.me --- include/uapi/linux/seccomp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 78074254ab98..0fdc6ef02b94 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -23,6 +23,8 @@ #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) +/* Received notifications wait in killable state (only respond to fatal signals) */ +#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) /* * All BPF programs must return a 32-bit value. -- cgit v1.2.3 From 41b3c69bf9414375319290c59f198ff5c71d273f Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:36 -0700 Subject: mptcp: expose server_side attribute in MPTCP netlink events This change records the 'server_side' attribute of MPTCP_EVENT_CREATED and MPTCP_EVENT_ESTABLISHED events to inform their recipient about the Client/Server role of the running MPTCP application. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/246 Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 9690efedb5fa..e41ea01a94bb 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -188,6 +188,7 @@ enum mptcp_event_attr { MPTCP_ATTR_IF_IDX, /* s32 */ MPTCP_ATTR_RESET_REASON,/* u32 */ MPTCP_ATTR_RESET_FLAGS, /* u32 */ + MPTCP_ATTR_SERVER_SIDE, /* u8 */ __MPTCP_ATTR_AFTER_LAST }; -- cgit v1.2.3 From 7b33a09d036ffd9a04506122840629c7e870cf08 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:40 +0000 Subject: KVM: arm64: Add support for userspace to suspend a vCPU Introduce a new MP state, KVM_MP_STATE_SUSPENDED, which indicates a vCPU is in a suspended state. In the suspended state the vCPU will block until a wakeup event (pending interrupt) is recognized. Add a new system event type, KVM_SYSTEM_EVENT_WAKEUP, to indicate to userspace that KVM has recognized one such wakeup event. It is the responsibility of userspace to then make the vCPU runnable, or leave it suspended until the next wakeup event. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-7-oupton@google.com --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6a184d260c7f..7f72fb7b05f2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -444,6 +444,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_WAKEUP 4 __u32 type; __u32 ndata; union { @@ -646,6 +647,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 #define KVM_MP_STATE_AP_RESET_HOLD 9 +#define KVM_MP_STATE_SUSPENDED 10 struct kvm_mp_state { __u32 mp_state; -- cgit v1.2.3 From bfbab44568779e1682bc6f63688bb9c965f0e74a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 4 May 2022 03:24:41 +0000 Subject: KVM: arm64: Implement PSCI SYSTEM_SUSPEND ARM DEN0022D.b 5.19 "SYSTEM_SUSPEND" describes a PSCI call that allows software to request that a system be placed in the deepest possible low-power state. Effectively, software can use this to suspend itself to RAM. Unfortunately, there really is no good way to implement a system-wide PSCI call in KVM. Any precondition checks done in the kernel will need to be repeated by userspace since there is no good way to protect a critical section that spans an exit to userspace. SYSTEM_RESET and SYSTEM_OFF are equally plagued by this issue, although no users have seemingly cared for the relatively long time these calls have been supported. The solution is to just make the whole implementation userspace's problem. Introduce a new system event, KVM_SYSTEM_EVENT_SUSPEND, that indicates to userspace a calling vCPU has invoked PSCI SYSTEM_SUSPEND. Additionally, add a CAP to get buy-in from userspace for this new exit type. Only advertise the SYSTEM_SUSPEND PSCI call if userspace has opted in. If a vCPU calls SYSTEM_SUSPEND, punt straight to userspace. Provide explicit documentation of userspace's responsibilites for the exit and point to the PSCI specification to describe the actual PSCI call. Reviewed-by: Reiji Watanabe Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220504032446.4133305-8-oupton@google.com --- include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f72fb7b05f2..32c56384fd08 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -445,6 +445,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_RESET 2 #define KVM_SYSTEM_EVENT_CRASH 3 #define KVM_SYSTEM_EVENT_WAKEUP 4 +#define KVM_SYSTEM_EVENT_SUSPEND 5 __u32 type; __u32 ndata; union { @@ -1154,6 +1155,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DISABLE_QUIRKS2 213 /* #define KVM_CAP_VM_TSC_CONTROL 214 */ #define KVM_CAP_SYSTEM_EVENT_DATA 215 +#define KVM_CAP_ARM_SYSTEM_SUSPEND 216 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 2068339a6c35147847ba433fd0da67b313779059 Mon Sep 17 00:00:00 2001 From: Dipen Patel Date: Fri, 22 Apr 2022 13:52:18 -0700 Subject: gpiolib: cdev: Add hardware timestamp clock type This patch adds new clock type for the GPIO controller which can timestamp gpio lines in using hardware means. To expose such functionalities to the userspace, code has been added where during line create or set config API calls, it checks for new clock type and if requested, calls HTE API. During line change event, the HTE subsystem pushes timestamp data to userspace through gpiolib-cdev. Signed-off-by: Dipen Patel Acked-by: Linus Walleij Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Thierry Reding --- include/uapi/linux/gpio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index eaaea3d8e6b4..cb9966d49a16 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -66,6 +66,8 @@ struct gpiochip_info { * @GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN: line has pull-down bias enabled * @GPIO_V2_LINE_FLAG_BIAS_DISABLED: line has bias disabled * @GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME: line events contain REALTIME timestamps + * @GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE: line events contain timestamps from + * hardware timestamp engine */ enum gpio_v2_line_flag { GPIO_V2_LINE_FLAG_USED = _BITULL(0), @@ -80,6 +82,7 @@ enum gpio_v2_line_flag { GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN = _BITULL(9), GPIO_V2_LINE_FLAG_BIAS_DISABLED = _BITULL(10), GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME = _BITULL(11), + GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE = _BITULL(12), }; /** -- cgit v1.2.3 From 9ab4807c84a4aacfc9b4f79cc81254035e0ec361 Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Tue, 3 May 2022 19:38:52 -0700 Subject: mptcp: netlink: Add MPTCP_PM_CMD_ANNOUNCE This change adds a MPTCP netlink interface for issuing ADD_ADDR advertisements over the chosen MPTCP connection from a userspace path manager. The command requires the following parameters: { token, { loc_id, family, daddr4 | daddr6 [, dport] } [, if_idx], flags[signal] }. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index e41ea01a94bb..ac66c1263f02 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -55,6 +55,7 @@ enum { MPTCP_PM_ATTR_ADDR, /* nested address */ MPTCP_PM_ATTR_RCV_ADD_ADDRS, /* u32 */ MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ + MPTCP_PM_ATTR_TOKEN, /* u32 */ __MPTCP_PM_ATTR_MAX }; @@ -93,6 +94,7 @@ enum { MPTCP_PM_CMD_SET_LIMITS, MPTCP_PM_CMD_GET_LIMITS, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_CMD_ANNOUNCE, __MPTCP_PM_CMD_AFTER_LAST }; -- cgit v1.2.3 From d9a4594edabf125dc17dfd52acc722c3de1cb44c Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Tue, 3 May 2022 19:38:54 -0700 Subject: mptcp: netlink: Add MPTCP_PM_CMD_REMOVE This change adds a MPTCP netlink command for issuing a REMOVE_ADDR signal for an address over the chosen MPTCP connection from a userspace path manager. The command requires the following parameters: {token, loc_id}. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index ac66c1263f02..11f9fa001a3c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -56,6 +56,7 @@ enum { MPTCP_PM_ATTR_RCV_ADD_ADDRS, /* u32 */ MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ MPTCP_PM_ATTR_TOKEN, /* u32 */ + MPTCP_PM_ATTR_LOC_ID, /* u8 */ __MPTCP_PM_ATTR_MAX }; @@ -95,6 +96,7 @@ enum { MPTCP_PM_CMD_GET_LIMITS, MPTCP_PM_CMD_SET_FLAGS, MPTCP_PM_CMD_ANNOUNCE, + MPTCP_PM_CMD_REMOVE, __MPTCP_PM_CMD_AFTER_LAST }; -- cgit v1.2.3 From 702c2f646d42cfd9e31133d68a8283fea48fd810 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 May 2022 19:38:56 -0700 Subject: mptcp: netlink: allow userspace-driven subflow establishment This allows userspace to tell kernel to add a new subflow to an existing mptcp connection. Userspace provides the token to identify the mptcp-level connection that needs a change in active subflows and the local and remote addresses of the new or the to-be-removed subflow. MPTCP_PM_CMD_SUBFLOW_CREATE requires the following parameters: { token, { loc_id, family, loc_addr4 | loc_addr6 }, { family, rem_addr4 | rem_addr6, rem_port } MPTCP_PM_CMD_SUBFLOW_DESTROY requires the following parameters: { token, { family, loc_addr4 | loc_addr6, loc_port }, { family, rem_addr4 | rem_addr6, rem_port } Acked-by: Paolo Abeni Co-developed-by: Kishen Maloor Signed-off-by: Kishen Maloor Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 11f9fa001a3c..921963589904 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -57,6 +57,7 @@ enum { MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ MPTCP_PM_ATTR_TOKEN, /* u32 */ MPTCP_PM_ATTR_LOC_ID, /* u8 */ + MPTCP_PM_ATTR_ADDR_REMOTE, /* nested address */ __MPTCP_PM_ATTR_MAX }; @@ -97,6 +98,8 @@ enum { MPTCP_PM_CMD_SET_FLAGS, MPTCP_PM_CMD_ANNOUNCE, MPTCP_PM_CMD_REMOVE, + MPTCP_PM_CMD_SUBFLOW_CREATE, + MPTCP_PM_CMD_SUBFLOW_DESTROY, __MPTCP_PM_CMD_AFTER_LAST }; -- cgit v1.2.3 From 36f8423597000bd7d5e48b7b306e1d0958e72359 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Wed, 23 Mar 2022 15:46:35 -0700 Subject: cfg80211: support disabling EHT mode Allow userspace to disable EHT mode during association. Signed-off-by: Muna Sinada Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20220323224636.20211-1-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0568a79097b8..d9490e3062a7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3175,6 +3175,8 @@ enum nl80211_attrs { NL80211_ATTR_EHT_CAPABILITY, + NL80211_ATTR_DISABLE_EHT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From c4a67a21a6d255ddcbaa076c0412aad73c7e0c02 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 May 2022 08:40:37 -0700 Subject: Revert "Merge branch 'mlxsw-line-card-model'" This reverts commit 5e927a9f4b9f29d78a7c7d66ea717bb5c8bbad8e, reversing changes made to cfc1d91a7d78cf9de25b043d81efcc16966d55b3. The discussion is still ongoing so let's remove the uAPI until the discussion settles. Link: https://lore.kernel.org/all/20220425090021.32e9a98f@kernel.org/ Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20220504154037.539442-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index fb8c3864457f..b3d40a5d72ff 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,8 +136,6 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, - DEVLINK_CMD_LINECARD_INFO_GET, /* can dump */ - /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -577,9 +575,6 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_STATE, /* u8 */ DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ - DEVLINK_ATTR_LINECARD_DEVICE_LIST, /* nested */ - DEVLINK_ATTR_LINECARD_DEVICE, /* nested */ - DEVLINK_ATTR_LINECARD_DEVICE_INDEX, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ -- cgit v1.2.3 From 0455d4ccec548b0fb51db39a4d3350a7a80a0222 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 26 Apr 2022 12:11:33 -0600 Subject: io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg If IORING_RECVSEND_POLL_FIRST is set for recv/recvmsg or send/sendmsg, then we arm poll first rather than attempt a receive or send upfront. This can be useful if we expect there to be no data (or space) available for the request, as we can then avoid wasting time on the initial issue attempt. Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index fad63564678a..06621a278cb6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -213,6 +213,16 @@ enum { #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) +/* + * send/sendmsg and recv/recvmsg flags (sqe->addr2) + * + * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send + * or receive and arm poll if that yields an + * -EAGAIN result, arm poll upfront and skip + * the initial transfer attempt. + */ +#define IORING_RECVSEND_POLL_FIRST (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 6cc2df8e3a3967e7c13a424f87f6efb1d4a62d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 6 May 2022 18:05:07 +0200 Subject: landlock: Add clang-format exceptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to a following commit, add clang-format on and clang-format off stanzas around constant definitions. This enables to keep aligned values, which is much more readable than packed definitions. Link: https://lore.kernel.org/r/20220506160513.523257-2-mic@digikod.net Cc: stable@vger.kernel.org Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index b3d952067f59..15c31abb0d76 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -33,7 +33,9 @@ struct landlock_ruleset_attr { * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI * version. */ +/* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) +/* clang-format on */ /** * enum landlock_rule_type - Landlock rule type @@ -120,6 +122,7 @@ struct landlock_path_beneath_attr { * :manpage:`access(2)`. * Future Landlock evolutions will enable to restrict them. */ +/* clang-format off */ #define LANDLOCK_ACCESS_FS_EXECUTE (1ULL << 0) #define LANDLOCK_ACCESS_FS_WRITE_FILE (1ULL << 1) #define LANDLOCK_ACCESS_FS_READ_FILE (1ULL << 2) @@ -133,5 +136,6 @@ struct landlock_path_beneath_attr { #define LANDLOCK_ACCESS_FS_MAKE_FIFO (1ULL << 10) #define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11) #define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12) +/* clang-format on */ #endif /* _UAPI_LINUX_LANDLOCK_H */ -- cgit v1.2.3 From a36e07dfe6ee71e209383ea9288cd8d1617e14f9 Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Fri, 6 May 2022 17:24:54 +0000 Subject: rfkill: uapi: fix RFKILL_IOCTL_MAX_SIZE ioctl request definition The definition of RFKILL_IOCTL_MAX_SIZE introduced by commit 54f586a91532 ("rfkill: make new event layout opt-in") is unusable since it is based on RFKILL_IOC_EXT_SIZE which has not been defined. Fix that by replacing the undefined constant with the constant which is intended to be used in this definition. Fixes: 54f586a91532 ("rfkill: make new event layout opt-in") Cc: stable@vger.kernel.org # 5.11+ Signed-off-by: Gleb Fotengauer-Malinovskiy Signed-off-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20220506172454.120319-1-glebfm@altlinux.org [add commit message provided later by Dmitry] Signed-off-by: Johannes Berg --- include/uapi/linux/rfkill.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h index 283c5a7b3f2c..db6c8588c1d0 100644 --- a/include/uapi/linux/rfkill.h +++ b/include/uapi/linux/rfkill.h @@ -184,7 +184,7 @@ struct rfkill_event_ext { #define RFKILL_IOC_NOINPUT 1 #define RFKILL_IOCTL_NOINPUT _IO(RFKILL_IOC_MAGIC, RFKILL_IOC_NOINPUT) #define RFKILL_IOC_MAX_SIZE 2 -#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_EXT_SIZE, __u32) +#define RFKILL_IOCTL_MAX_SIZE _IOW(RFKILL_IOC_MAGIC, RFKILL_IOC_MAX_SIZE, __u32) /* and that's all userspace gets */ -- cgit v1.2.3 From ebdeb7c01d025cb059f05dc26b9dc914e46dd43f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 31 Mar 2022 19:27:52 -0600 Subject: io_uring: add support for 128-byte SQEs Normal SQEs are 64-bytes in length, which is fine for all the commands we support. However, in preparation for supporting passthrough IO, provide an option for setting up a ring with 128-byte SQEs. We continue to use the same type for io_uring_sqe, it's marked and commented with a zero sized array pad at the end. This provides up to 80 bytes of data for a passthrough command - 64 bytes for the extra added data, and 16 bytes available at the end of the existing SQE. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 31e719f38615..ee84132cadad 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -63,6 +63,12 @@ struct io_uring_sqe { }; __u64 addr3; __u64 __pad2[1]; + + /* + * If the ring is initialized with IORING_SETUP_SQE128, then this field + * contains 64-bytes of padding, doubling the size of the SQE. + */ + __u64 __big_sqe_pad[0]; }; enum { @@ -119,6 +125,8 @@ enum { */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) +#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, -- cgit v1.2.3 From 7a51e5b44b92686eebd3e1b46b86e1eb4db975db Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Tue, 26 Apr 2022 11:21:23 -0700 Subject: io_uring: support CQE32 in io_uring_cqe This adds the big_cqe array to the struct io_uring_cqe to support large CQE's. Co-developed-by: Jens Axboe Signed-off-by: Stefan Roesch Reviewed-by: Kanchan Joshi Link: https://lore.kernel.org/r/20220426182134.136504-2-shr@fb.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ee84132cadad..ac2d90d669c3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -126,6 +126,7 @@ enum { #define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ +#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ enum io_uring_op { IORING_OP_NOP, @@ -245,6 +246,12 @@ struct io_uring_cqe { __u64 user_data; /* sqe->data submission passed back */ __s32 res; /* result code for this event */ __u32 flags; + + /* + * If the ring is initialized with IORING_SETUP_CQE32, then this field + * contains 16-bytes of padding, doubling the size of the CQE. + */ + __u64 big_cqe[]; }; /* -- cgit v1.2.3 From bd32889e841c12533d09a1bd02bba932baa9ed8f Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 29 Apr 2022 23:56:41 +0000 Subject: binder: add BINDER_GET_EXTENDED_ERROR ioctl Provide a userspace mechanism to pull precise error information upon failed operations. Extending the current error codes returned by the interfaces allows userspace to better determine the course of action. This could be for instance, retrying a failed transaction at a later point and thus offloading the error handling from the driver. Acked-by: Christian Brauner (Microsoft) Acked-by: Todd Kjos Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20220429235644.697372-3-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 11157fae8a8e..e6ee8cae303b 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -236,6 +236,21 @@ struct binder_frozen_status_info { __u32 async_recv; }; +/* struct binder_extened_error - extended error information + * @id: identifier for the failed operation + * @command: command as defined by binder_driver_return_protocol + * @param: parameter holding a negative errno value + * + * Used with BINDER_GET_EXTENDED_ERROR. This extends the error information + * returned by the driver upon a failed operation. Userspace can pull this + * data to properly handle specific error scenarios. + */ +struct binder_extended_error { + __u32 id; + __u32 command; + __s32 param; +}; + #define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) #define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) @@ -249,6 +264,7 @@ struct binder_frozen_status_info { #define BINDER_FREEZE _IOW('b', 14, struct binder_freeze_info) #define BINDER_GET_FROZEN_INFO _IOWR('b', 15, struct binder_frozen_status_info) #define BINDER_ENABLE_ONEWAY_SPAM_DETECTION _IOW('b', 16, __u32) +#define BINDER_GET_EXTENDED_ERROR _IOWR('b', 17, struct binder_extended_error) /* * NOTE: Two special error codes you should check for when calling -- cgit v1.2.3 From 7ff960a6fe399fdcbca6159063684671ae57eee9 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Tue, 10 May 2022 19:27:23 +0900 Subject: virtio: fix virtio transitional ids This commit fixes the transitional PCI device ID. Fixes: d61914ea6ada ("virtio: update virtio id table, add transitional ids") Signed-off-by: Shunsuke Mie Link: https://lore.kernel.org/r/20220510102723.87666-1-mie@igel.co.jp Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 80d76b75bccd..7aa2eb766205 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -73,12 +73,12 @@ * Virtio Transitional IDs */ -#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ -#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ -#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ -#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ -#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ -#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ -#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ +#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */ +#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */ +#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */ +#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */ +#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */ +#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */ +#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From c23d47abee3a54e4991ed3993340596d04aabd6a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Apr 2022 08:33:03 +0200 Subject: loop: remove most the top-of-file boilerplate comment from the UAPI header Just leave the SPDX marker and the copyright notice and remove the irrelevant rest. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220419063303.583106-5-hch@lst.de Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 98e60801195e..6f63527dd2ed 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -1,11 +1,6 @@ /* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ /* - * include/linux/loop.h - * - * Written by Theodore Ts'o, 3/29/93. - * - * Copyright 1993 by Theodore Ts'o. Redistribution of this file is - * permitted under the GNU General Public License. + * Copyright 1993 by Theodore Ts'o. */ #ifndef _UAPI_LINUX_LOOP_H #define _UAPI_LINUX_LOOP_H -- cgit v1.2.3 From 26101f5ab6bdf30ac25c8e578e0b4873e7849e0c Mon Sep 17 00:00:00 2001 From: Kaixi Fan Date: Sat, 30 Apr 2022 15:48:42 +0800 Subject: bpf: Add source ip in "struct bpf_tunnel_key" Add tunnel source ip field in "struct bpf_tunnel_key". Add related code to set and get tunnel source field. Signed-off-by: Kaixi Fan Link: https://lore.kernel.org/r/20220430074844.69214-2-fankaixi.li@bytedance.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 444fe6f1cf35..95a3d1ff6255 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5604,6 +5604,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. -- cgit v1.2.3 From f7e0beaf39d3868dc700d4954b26cf8443c5d423 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 10 May 2022 13:59:19 -0700 Subject: bpf, x86: Generate trampolines from bpf_tramp_links Replace struct bpf_tramp_progs with struct bpf_tramp_links to collect struct bpf_tramp_link(s) for a trampoline. struct bpf_tramp_link extends bpf_link to act as a linked list node. arch_prepare_bpf_trampoline() accepts a struct bpf_tramp_links to collects all bpf_tramp_link(s) that a trampoline should call. Change BPF trampoline and bpf_struct_ops to pass bpf_tramp_links instead of bpf_tramp_progs. Signed-off-by: Kui-Feng Lee Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220510205923.3206889-2-kuifeng@fb.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 95a3d1ff6255..3d032ea1b6a3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; -- cgit v1.2.3 From 2fcc82411e74e5e6aba336561cf56fb899bfae4e Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 10 May 2022 13:59:21 -0700 Subject: bpf, x86: Attach a cookie to fentry/fexit/fmod_ret/lsm. Pass a cookie along with BPF_LINK_CREATE requests. Add a bpf_cookie field to struct bpf_tracing_link to attach a cookie. The cookie of a bpf_tracing_link is available by calling bpf_get_attach_cookie when running the BPF program of the attached link. The value of a cookie will be set at bpf_tramp_run_ctx by the trampoline of the link. Signed-off-by: Kui-Feng Lee Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220510205923.3206889-4-kuifeng@fb.com --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3d032ea1b6a3..bc7f89948f54 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1490,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; -- cgit v1.2.3 From a8641d7d8500d41d312350470464e03f3df3672a Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 9 May 2022 18:50:25 +0530 Subject: HID: amd_sfh: Move bus declaration outside of amd-sfh This should allow external drivers to reference this bus ID reservation and detect data coming from amd-sfh. Signed-off-by: Mario Limonciello Signed-off-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- include/uapi/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ee3127461ee0..ef4257ab3026 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -271,6 +271,7 @@ struct input_mask { #define BUS_RMI 0x1D #define BUS_CEC 0x1E #define BUS_INTEL_ISHTP 0x1F +#define BUS_AMD_SFH 0x20 /* * MT_TOOL types -- cgit v1.2.3 From ee692a21e9bf8354bd3ec816f1cf4bff8619ed77 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 May 2022 11:17:45 +0530 Subject: fs,io_uring: add infrastructure for uring-cmd file_operations->uring_cmd is a file private handler. This is somewhat similar to ioctl but hopefully a lot more sane and useful as it can be used to enable many io_uring capabilities for the underlying operation. IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't know what is in this command type, it's for the provider of ->uring_cmd() to deal with. Co-developed-by: Kanchan Joshi Signed-off-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220511054750.20432-2-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ac2d90d669c3..23618be55dd2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -22,6 +22,7 @@ struct io_uring_sqe { union { __u64 off; /* offset into file */ __u64 addr2; + __u32 cmd_op; }; union { __u64 addr; /* pointer to buffer or iovecs */ @@ -61,14 +62,17 @@ struct io_uring_sqe { __s32 splice_fd_in; __u32 file_index; }; - __u64 addr3; - __u64 __pad2[1]; - - /* - * If the ring is initialized with IORING_SETUP_SQE128, then this field - * contains 64-bytes of padding, doubling the size of the SQE. - */ - __u64 __big_sqe_pad[0]; + union { + struct { + __u64 addr3; + __u64 __pad2[1]; + }; + /* + * If the ring is initialized with IORING_SETUP_SQE128, then + * this field is used for 80 bytes of arbitrary command data + */ + __u8 cmd[0]; + }; }; enum { @@ -175,6 +179,7 @@ enum io_uring_op { IORING_OP_FGETXATTR, IORING_OP_GETXATTR, IORING_OP_SOCKET, + IORING_OP_URING_CMD, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 456cba386e94f22fa1b1426303fdcac9e66b1417 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Wed, 11 May 2022 11:17:48 +0530 Subject: nvme: wire-up uring-cmd support for io-passthru on char-device. Introduce handler for fops->uring_cmd(), implementing async passthru on char device (/dev/ngX). The handler supports newly introduced operation NVME_URING_CMD_IO. This operates on a new structure nvme_uring_cmd, which is similar to struct nvme_passthru_cmd64 but without the embedded 8b result field. This field is not needed since uring-cmd allows to return additional result via big-CQE. Signed-off-by: Kanchan Joshi Signed-off-by: Anuj Gupta Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220511054750.20432-5-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/nvme_ioctl.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index b2e43185e3b5..04e458c649ab 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -70,6 +70,28 @@ struct nvme_passthru_cmd64 { __u64 result; }; +/* same as struct nvme_passthru_cmd64, minus the 8b result field */ +struct nvme_uring_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 rsvd2; +}; + #define nvme_admin_cmd nvme_passthru_cmd #define NVME_IOCTL_ID _IO('N', 0x40) @@ -83,4 +105,7 @@ struct nvme_passthru_cmd64 { #define NVME_IOCTL_IO64_CMD _IOWR('N', 0x48, struct nvme_passthru_cmd64) #define NVME_IOCTL_IO64_CMD_VEC _IOWR('N', 0x49, struct nvme_passthru_cmd64) +/* io_uring async commands: */ +#define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) + #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- cgit v1.2.3 From f569add47119fa910ed7711b26b8d38e21f7ea77 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Wed, 11 May 2022 11:17:49 +0530 Subject: nvme: add vectored-io support for uring-cmd wire up support for async passthru that takes an array of buffers (using iovec). Exposed via a new op NVME_URING_CMD_IO_VEC. Same 'struct nvme_uring_cmd' is to be used with - 1. cmd.addr as base address of user iovec array 2. cmd.data_len as count of iovec array elements Signed-off-by: Kanchan Joshi Signed-off-by: Anuj Gupta Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220511054750.20432-6-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/nvme_ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 04e458c649ab..0b1876aa5a59 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -107,5 +107,6 @@ struct nvme_uring_cmd { /* io_uring async commands: */ #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) +#define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- cgit v1.2.3 From 07343110b293456d30393e89b86c4dee1ac051c8 Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Wed, 11 May 2022 17:38:53 +0800 Subject: bpf: add bpf_map_lookup_percpu_elem for percpu map Add new ebpf helpers bpf_map_lookup_percpu_elem. The implementation method is relatively simple, refer to the implementation method of map_lookup_elem of percpu map, increase the parameters of cpu, and obtain it according to the specified cpu. Signed-off-by: Feng Zhou Link: https://lore.kernel.org/r/20220511093854.411-2-zhoufeng.zf@bytedance.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc7f89948f54..0210f85131b3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5164,6 +5164,14 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5361,6 +5369,7 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From c9b516f16be5896a3d798f8efb03acbd2ceec715 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 12 May 2022 20:38:36 -0700 Subject: ELF, uapi: fixup ELF_ST_TYPE definition This is very theoretical compile failure: ELF_ST_TYPE(st_info = A) Cast will bind first and st_info will stop being lvalue: error: lvalue required as left operand of assignment Given that the only use of this macro is ELF_ST_TYPE(sym->st_info) where st_info is "unsigned char" I've decided to remove cast especially given that companion macro ELF_ST_BIND doesn't use cast. Link: https://lkml.kernel.org/r/Ymv7G1BeX4kt3obz@localhost.localdomain Signed-off-by: Alexey Dobriyan Acked-by: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton --- include/uapi/linux/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 787c657bfae8..237f21a5e0f6 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -134,7 +134,7 @@ typedef __s64 Elf64_Sxword; #define STT_TLS 6 #define ELF_ST_BIND(x) ((x) >> 4) -#define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF_ST_TYPE(x) ((x) & 0xf) #define ELF32_ST_BIND(x) ELF_ST_BIND(x) #define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) #define ELF64_ST_BIND(x) ELF_ST_BIND(x) -- cgit v1.2.3 From 783eb354fb3dcd598e8e7e8a2ed88c0fb6ce5d2f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 4 Apr 2022 15:19:41 +0900 Subject: agpgart.h: do not include from exported header Commit 35d0f1d54ecd ("include/uapi/linux/agpgart.h: include stdlib.h in userspace") included to fix the unknown size_t error, but I do not think it is the right fix. This header already uses __kernel_size_t a few lines below. Replace the remaining size_t, and stop including . Signed-off-by: Masahiro Yamada Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Reviewed-by: Nick Desaulniers --- include/uapi/linux/agpgart.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/agpgart.h b/include/uapi/linux/agpgart.h index f5251045181a..9cc3448c0b5b 100644 --- a/include/uapi/linux/agpgart.h +++ b/include/uapi/linux/agpgart.h @@ -52,7 +52,6 @@ #ifndef __KERNEL__ #include -#include struct agp_version { __u16 major; @@ -64,10 +63,10 @@ typedef struct _agp_info { __u32 bridge_id; /* bridge vendor/device */ __u32 agp_mode; /* mode info of bridge */ unsigned long aper_base;/* base of aperture */ - size_t aper_size; /* size of aperture */ - size_t pg_total; /* max pages (swap + system) */ - size_t pg_system; /* max pages (system) */ - size_t pg_used; /* current pages used */ + __kernel_size_t aper_size; /* size of aperture */ + __kernel_size_t pg_total; /* max pages (swap + system) */ + __kernel_size_t pg_system; /* max pages (system) */ + __kernel_size_t pg_used; /* current pages used */ } agp_info; typedef struct _agp_setup { -- cgit v1.2.3 From 1339f24b336db5ded9811f3fe7b948e0de207785 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 7 May 2022 14:18:44 -0600 Subject: io_uring: allow allocated fixed files for openat/openat2 If the application passes in IORING_FILE_INDEX_ALLOC as the file_slot, then that's a hint to allocate a fixed file descriptor rather than have one be passed in directly. This can be useful for having io_uring manage the direct descriptor space. Normal open direct requests will complete with 0 for success, and < 0 in case of error. If io_uring is asked to allocated the direct descriptor, then the direct descriptor is returned in case of success. Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 06621a278cb6..b7f02a55032a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -63,6 +63,15 @@ struct io_uring_sqe { __u64 __pad2[2]; }; +/* + * If sqe->file_index is set to this for opcodes that instantiate a new + * direct descriptor (like openat/openat2/accept), then io_uring will allocate + * an available direct descriptor instead of having the application pass one + * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE + * if the space is full. + */ +#define IORING_FILE_INDEX_ALLOC (~0U) + enum { IOSQE_FIXED_FILE_BIT, IOSQE_IO_DRAIN_BIT, -- cgit v1.2.3 From a8da73a32b6e9271a613e5a0e90a8c35f40abeb8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 May 2022 09:29:14 -0600 Subject: io_uring: add flag for allocating a fully sparse direct descriptor space Currently to setup a fully sparse descriptor space upfront, the app needs to alloate an array of the full size and memset it to -1 and then pass that in. Make this a bit easier by allowing a flag that simply does this internally rather than needing to copy each slot separately. This works with IORING_REGISTER_FILES2 as the flag is set in struct io_uring_rsrc_register, and is only allow when the type is IORING_RSRC_FILE as this doesn't make sense for registered buffers. Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b7f02a55032a..36ec43dc7bf9 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -396,9 +396,15 @@ struct io_uring_files_update { __aligned_u64 /* __s32 * */ fds; }; +/* + * Register a fully sparse file space, rather than pass in an array of all + * -1 file descriptors. + */ +#define IORING_RSRC_REGISTER_SPARSE (1U << 0) + struct io_uring_rsrc_register { __u32 nr; - __u32 resv; + __u32 flags; __u64 resv2; __aligned_u64 data; __aligned_u64 tags; -- cgit v1.2.3 From b1f9e876862d8f7176299ec4fb2108bc1045cbc8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 May 2022 20:22:56 -0700 Subject: mm/uffd: enable write protection for shmem & hugetlbfs We've had all the necessary changes ready for both shmem and hugetlbfs. Turn on all the shmem/hugetlbfs switches for userfaultfd-wp. We can expand UFFD_API_RANGE_IOCTLS_BASIC with _UFFDIO_WRITEPROTECT too because all existing types now support write protection mode. Since vma_can_userfault() will be used elsewhere, move into userfaultfd_k.h. Link: https://lkml.kernel.org/r/20220405014926.15101-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Signed-off-by: Andrew Morton --- include/uapi/linux/userfaultfd.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index ef739054cb1c..7d32b1e797fb 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -33,7 +33,8 @@ UFFD_FEATURE_THREAD_ID | \ UFFD_FEATURE_MINOR_HUGETLBFS | \ UFFD_FEATURE_MINOR_SHMEM | \ - UFFD_FEATURE_EXACT_ADDRESS) + UFFD_FEATURE_EXACT_ADDRESS | \ + UFFD_FEATURE_WP_HUGETLBFS_SHMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -47,7 +48,8 @@ #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ - (__u64)1 << _UFFDIO_CONTINUE) + (__u64)1 << _UFFDIO_CONTINUE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT) /* * Valid ioctl command number range with this API is from 0x00 to @@ -194,6 +196,9 @@ struct uffdio_api { * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page * faults would be provided and the offset within the page would not be * masked. + * + * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd + * write-protection mode is supported on both shmem and hugetlbfs. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -207,6 +212,7 @@ struct uffdio_api { #define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) #define UFFD_FEATURE_MINOR_SHMEM (1<<10) #define UFFD_FEATURE_EXACT_ADDRESS (1<<11) +#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) __u64 features; __u64 ioctls; -- cgit v1.2.3 From 390ed29b5e425ba00da2b6113b74a14949f71b02 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Sat, 14 May 2022 22:20:43 +0800 Subject: io_uring: add IORING_ACCEPT_MULTISHOT for accept add an accept_flag IORING_ACCEPT_MULTISHOT for accept, which is to support multishot. Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20220514142046.58072-2-haoxu.linux@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 36ec43dc7bf9..15f821af9242 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -232,6 +232,11 @@ enum { */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) +/* + * accept flags stored in sqe->ioprio + */ +#define IORING_ACCEPT_MULTISHOT (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 8fa10ee183c3a1ecb53e81c95895ed5bc2a5530a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 15 May 2022 21:58:31 +0100 Subject: cdrom: mark CDROMGETSPINDOWN/CDROMSETSPINDOWN obsolete These were only implemented by the IDE CD driver, which has since been removed. Given that nobody is likely to create new CD/DVD hardware (and associated drivers) we can mark these appropriately. Cc: Jens Axboe Cc: Christoph Hellwig Cc: Phillip Potter Signed-off-by: Paul Gortmaker Link: https://lore.kernel.org/all/20220427132436.12795-3-paul.gortmaker@windriver.com Signed-off-by: Phillip Potter Link: https://lore.kernel.org/r/20220515205833.944139-4-phil@philpotter.co.uk Signed-off-by: Jens Axboe --- include/uapi/linux/cdrom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h index 804ff8d98f71..011e594e4a0d 100644 --- a/include/uapi/linux/cdrom.h +++ b/include/uapi/linux/cdrom.h @@ -103,7 +103,7 @@ #define CDROMREADALL 0x5318 /* read all 2646 bytes */ /* - * These ioctls are (now) only in ide-cd.c for controlling + * These ioctls were only in (now removed) ide-cd.c for controlling * drive spindown time. They should be implemented in the * Uniform driver, via generic packet commands, GPCMD_MODE_SELECT_10, * GPCMD_MODE_SENSE_10 and the GPMODE_POWER_PAGE... -- cgit v1.2.3 From 89527be8d8d672773eeaec910118a6e84fb597e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:33:56 -0700 Subject: net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes New netlink attributes IFLA_TSO_MAX_SIZE and IFLA_TSO_MAX_SEGS are used to report to user-space the device TSO limits. ip -d link sh dev eth1 ... tso_max_size 65536 tso_max_segs 65535 Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d1e600816b82..5f58dcfe2787 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -368,6 +368,8 @@ enum { IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, __IFLA_MAX }; -- cgit v1.2.3 From f04fbcc64e4be16185151f9fca44ea1b3d074bd0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Apr 2022 16:08:27 +0800 Subject: btrfs: move definition of btrfs_raid_types to volumes.h It's only internally used as another way to represent btrfs profiles, it's not exposed through any on-disk format, in fact this btrfs_raid_types is diverted from the on-disk format values. Furthermore, since it's internal structure, its definition can change in the future. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index b069752a8ecf..d4117152d907 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -880,19 +880,6 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ BTRFS_SPACE_INFO_GLOBAL_RSV) -enum btrfs_raid_types { - BTRFS_RAID_RAID10, - BTRFS_RAID_RAID1, - BTRFS_RAID_DUP, - BTRFS_RAID_RAID0, - BTRFS_RAID_SINGLE, - BTRFS_RAID_RAID5, - BTRFS_RAID_RAID6, - BTRFS_RAID_RAID1C3, - BTRFS_RAID_RAID1C4, - BTRFS_NR_RAID_TYPES -}; - #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ BTRFS_BLOCK_GROUP_SYSTEM | \ BTRFS_BLOCK_GROUP_METADATA) -- cgit v1.2.3 From 1c05bb947f6464756174830b778aabf8f9d6ed0e Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 16 May 2022 12:12:02 +0200 Subject: include/uapi/linux/vfio.h: Fix trivial typo - _IORW should be _IOWR instead There is no macro called _IORW, so use _IOWR in the comment instead. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20220516101202.88373-1-thuth@redhat.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fea86061b44e..733a1cddde30 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -643,7 +643,7 @@ enum { }; /** - * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, * struct vfio_pci_hot_reset_info) * * Return: 0 on success, -errno on failure: @@ -770,7 +770,7 @@ struct vfio_device_ioeventfd { #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) /** - * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, + * VFIO_DEVICE_FEATURE - _IOWR(VFIO_TYPE, VFIO_BASE + 17, * struct vfio_device_feature) * * Get, set, or probe feature data of the device. The feature is selected -- cgit v1.2.3 From 9f39d36530e5678d092d53c5c2c60d82b4dcc169 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 7 May 2022 13:55:58 +0200 Subject: can: isotp: add support for transmission without flow control Usually the ISO 15765-2 protocol is a point-to-point protocol to transfer segmented PDUs to a dedicated receiver. This receiver sends a flow control message to specify protocol options and timings (e.g. block size / STmin). The so called functional addressing communication allows a 1:N communication but is limited to a single frame length. This new CAN_ISOTP_CF_BROADCAST allows an unconfirmed 1:N communication with PDU length that would not fit into a single frame. This feature is not covered by the ISO 15765-2 standard. Link: https://lore.kernel.org/all/20220507115558.19065-1-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 590f8aea2b6d..439c982f7e81 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -124,18 +124,19 @@ struct can_isotp_ll_options { /* flags for isotp behaviour */ -#define CAN_ISOTP_LISTEN_MODE 0x001 /* listen only (do not send FC) */ -#define CAN_ISOTP_EXTEND_ADDR 0x002 /* enable extended addressing */ -#define CAN_ISOTP_TX_PADDING 0x004 /* enable CAN frame padding tx path */ -#define CAN_ISOTP_RX_PADDING 0x008 /* enable CAN frame padding rx path */ -#define CAN_ISOTP_CHK_PAD_LEN 0x010 /* check received CAN frame padding */ -#define CAN_ISOTP_CHK_PAD_DATA 0x020 /* check received CAN frame padding */ -#define CAN_ISOTP_HALF_DUPLEX 0x040 /* half duplex error state handling */ -#define CAN_ISOTP_FORCE_TXSTMIN 0x080 /* ignore stmin from received FC */ -#define CAN_ISOTP_FORCE_RXSTMIN 0x100 /* ignore CFs depending on rx stmin */ -#define CAN_ISOTP_RX_EXT_ADDR 0x200 /* different rx extended addressing */ -#define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ -#define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ +#define CAN_ISOTP_LISTEN_MODE 0x0001 /* listen only (do not send FC) */ +#define CAN_ISOTP_EXTEND_ADDR 0x0002 /* enable extended addressing */ +#define CAN_ISOTP_TX_PADDING 0x0004 /* enable CAN frame padding tx path */ +#define CAN_ISOTP_RX_PADDING 0x0008 /* enable CAN frame padding rx path */ +#define CAN_ISOTP_CHK_PAD_LEN 0x0010 /* check received CAN frame padding */ +#define CAN_ISOTP_CHK_PAD_DATA 0x0020 /* check received CAN frame padding */ +#define CAN_ISOTP_HALF_DUPLEX 0x0040 /* half duplex error state handling */ +#define CAN_ISOTP_FORCE_TXSTMIN 0x0080 /* ignore stmin from received FC */ +#define CAN_ISOTP_FORCE_RXSTMIN 0x0100 /* ignore CFs depending on rx stmin */ +#define CAN_ISOTP_RX_EXT_ADDR 0x0200 /* different rx extended addressing */ +#define CAN_ISOTP_WAIT_TX_DONE 0x0400 /* wait for tx completion */ +#define CAN_ISOTP_SF_BROADCAST 0x0800 /* 1-to-N functional addressing */ +#define CAN_ISOTP_CF_BROADCAST 0x1000 /* 1-to-N transmission w/o FC */ /* protocol machine default values */ -- cgit v1.2.3 From b87f5e25b2f9deb503a61c6957c7b1680d91cfea Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Fri, 6 May 2022 01:03:48 +0200 Subject: media: uapi: Add IPU3 packed Y10 format Some platforms with an Intel IPU3 have an IR sensor producing 10 bit greyscale format data that is transmitted over a CSI-2 bus to a CIO2 device - this packs the data into 32 bytes per 25 pixels. Add an entry to the uAPI header defining that format. Signed-off-by: Daniel Scally Acked-by: Andy Shevchenko Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 6d465dc443b7..343b95107fce 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -569,6 +569,7 @@ struct v4l2_pix_format { /* Grey bit-packed formats */ #define V4L2_PIX_FMT_Y10BPACK v4l2_fourcc('Y', '1', '0', 'B') /* 10 Greyscale bit-packed */ #define V4L2_PIX_FMT_Y10P v4l2_fourcc('Y', '1', '0', 'P') /* 10 Greyscale, MIPI RAW10 packed */ +#define V4L2_PIX_FMT_IPU3_Y10 v4l2_fourcc('i', 'p', '3', 'y') /* IPU3 packed 10-bit greyscale */ /* Palette formats */ #define V4L2_PIX_FMT_PAL8 v4l2_fourcc('P', 'A', 'L', '8') /* 8 8-bit palette */ @@ -749,7 +750,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_QC08C v4l2_fourcc('Q', '0', '8', 'C') /* Qualcomm 8-bit compressed */ #define V4L2_PIX_FMT_QC10C v4l2_fourcc('Q', '1', '0', 'C') /* Qualcomm 10-bit compressed */ -/* 10bit raw bayer packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */ +/* 10bit raw packed, 32 bytes for every 25 pixels, last LSB 6 bits unused */ #define V4L2_PIX_FMT_IPU3_SBGGR10 v4l2_fourcc('i', 'p', '3', 'b') /* IPU3 packed 10-bit BGGR bayer */ #define V4L2_PIX_FMT_IPU3_SGBRG10 v4l2_fourcc('i', 'p', '3', 'g') /* IPU3 packed 10-bit GBRG bayer */ #define V4L2_PIX_FMT_IPU3_SGRBG10 v4l2_fourcc('i', 'p', '3', 'G') /* IPU3 packed 10-bit GRBG bayer */ -- cgit v1.2.3 From 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 17 May 2022 09:27:08 +0200 Subject: dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The typedefs u32 and u64 are not available in userspace. Thus user get an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B: $ gcc -Wall -c -MMD -c -o ioctls_list.o ioctls_list.c In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1, from /usr/include/linux/ioctl.h:5, from /usr/include/asm-generic/ioctls.h:5, from ioctls_list.c:11: ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function) 463 | { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function) 464 | { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h | ^~~~~~~~~~~~~~~~~~ The issue was initially reported here[1]. [1]: https://github.com/jerome-pouiller/ioctl/pull/14 Signed-off-by: Jérôme Pouiller Reviewed-by: Christian König Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi") CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com Signed-off-by: Christian König --- include/uapi/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 8e4a2ca0bcbf..b1523cb8ab30 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -92,7 +92,7 @@ struct dma_buf_sync { * between them in actual uapi, they're just different numbers. */ #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) -#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) -#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) #endif -- cgit v1.2.3 From c8383054506c77b814489c09877b5db83fd4abf2 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 25 Apr 2022 20:21:24 +0800 Subject: cachefiles: notify the user daemon when looking up cookie Fscache/CacheFiles used to serve as a local cache for a remote networking fs. A new on-demand read mode will be introduced for CacheFiles, which can boost the scenario where on-demand read semantics are needed, e.g. container image distribution. The essential difference between these two modes is seen when a cache miss occurs: In the original mode, the netfs will fetch the data from the remote server and then write it to the cache file; in on-demand read mode, fetching the data and writing it into the cache is delegated to a user daemon. As the first step, notify the user daemon when looking up cookie. In this case, an anonymous fd is sent to the user daemon, through which the user daemon can write the fetched data to the cache file. Since the user daemon may move the anonymous fd around, e.g. through dup(), an object ID uniquely identifying the cache file is also attached. Also add one advisory flag (FSCACHE_ADV_WANT_CACHE_SIZE) suggesting that the cache file size shall be retrieved at runtime. This helps the scenario where one cache file contains multiple netfs files, e.g. for the purpose of deduplication. In this case, netfs itself has no idea the size of the cache file, whilst the user daemon should give the hint on it. Signed-off-by: Jeffle Xu Link: https://lore.kernel.org/r/20220509074028.74954-3-jefflexu@linux.alibaba.com Acked-by: David Howells Signed-off-by: Gao Xiang --- include/uapi/linux/cachefiles.h | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/uapi/linux/cachefiles.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h new file mode 100644 index 000000000000..521f2fe4fe9c --- /dev/null +++ b/include/uapi/linux/cachefiles.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_CACHEFILES_H +#define _LINUX_CACHEFILES_H + +#include + +/* + * Fscache ensures that the maximum length of cookie key is 255. The volume key + * is controlled by netfs, and generally no bigger than 255. + */ +#define CACHEFILES_MSG_MAX_SIZE 1024 + +enum cachefiles_opcode { + CACHEFILES_OP_OPEN, +}; + +/* + * Message Header + * + * @msg_id a unique ID identifying this message + * @opcode message type, CACHEFILE_OP_* + * @len message length, including message header and following data + * @object_id a unique ID identifying a cache file + * @data message type specific payload + */ +struct cachefiles_msg { + __u32 msg_id; + __u32 opcode; + __u32 len; + __u32 object_id; + __u8 data[]; +}; + +/* + * @data contains the volume_key followed directly by the cookie_key. volume_key + * is a NUL-terminated string; @volume_key_size indicates the size of the volume + * key in bytes. cookie_key is binary data, which is netfs specific; + * @cookie_key_size indicates the size of the cookie key in bytes. + * + * @fd identifies an anon_fd referring to the cache file. + */ +struct cachefiles_open { + __u32 volume_key_size; + __u32 cookie_key_size; + __u32 fd; + __u32 flags; + __u8 data[]; +}; + +#endif -- cgit v1.2.3 From 324b954ac80cff0d11ddb6bde9b6631e45e98620 Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 25 Apr 2022 20:21:26 +0800 Subject: cachefiles: notify the user daemon when withdrawing cookie Notify the user daemon that cookie is going to be withdrawn, providing a hint that the associated anonymous fd can be closed. Be noted that this is only a hint. The user daemon may close the associated anonymous fd when receiving the CLOSE request, then it will receive another anonymous fd when the cookie gets looked up. Or it may ignore the CLOSE request, and keep writing data through the anonymous fd. However the next time the cookie gets looked up, the user daemon will still receive another new anonymous fd. Signed-off-by: Jeffle Xu Acked-by: David Howells Link: https://lore.kernel.org/r/20220425122143.56815-5-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- include/uapi/linux/cachefiles.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h index 521f2fe4fe9c..37a0071037c8 100644 --- a/include/uapi/linux/cachefiles.h +++ b/include/uapi/linux/cachefiles.h @@ -12,6 +12,7 @@ enum cachefiles_opcode { CACHEFILES_OP_OPEN, + CACHEFILES_OP_CLOSE, }; /* -- cgit v1.2.3 From 9032b6e8589f269743984aac53e82e4835be16dc Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Mon, 25 Apr 2022 20:21:27 +0800 Subject: cachefiles: implement on-demand read Implement the data plane of on-demand read mode. The early implementation [1] place the entry to cachefiles_ondemand_read() in fscache_read(). However, fscache_read() can only detect if the requested file range is fully cache miss, whilst we need to notify the user daemon as long as there's a hole inside the requested file range. Thus the entry is now placed in cachefiles_prepare_read(). When working in on-demand read mode, once a hole detected, the read routine will send a READ request to the user daemon. The user daemon needs to fetch the data and write it to the cache file. After sending the READ request, the read routine will hang there, until the READ request is handled by the user daemon. Then it will retry to read from the same file range. If no progress encountered, the read routine will fail then. A new NETFS_SREQ_ONDEMAND flag is introduced to indicate that on-demand read should be done when a cache miss encountered. [1] https://lore.kernel.org/all/20220406075612.60298-6-jefflexu@linux.alibaba.com/ #v8 Signed-off-by: Jeffle Xu Acked-by: David Howells Link: https://lore.kernel.org/r/20220425122143.56815-6-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- include/uapi/linux/cachefiles.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cachefiles.h b/include/uapi/linux/cachefiles.h index 37a0071037c8..78caa73e5343 100644 --- a/include/uapi/linux/cachefiles.h +++ b/include/uapi/linux/cachefiles.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEFILES_H #include +#include /* * Fscache ensures that the maximum length of cookie key is 255. The volume key @@ -13,6 +14,7 @@ enum cachefiles_opcode { CACHEFILES_OP_OPEN, CACHEFILES_OP_CLOSE, + CACHEFILES_OP_READ, }; /* @@ -48,4 +50,19 @@ struct cachefiles_open { __u8 data[]; }; +/* + * @off indicates the starting offset of the requested file range + * @len indicates the length of the requested file range + */ +struct cachefiles_read { + __u64 off; + __u64 len; +}; + +/* + * Reply for READ request + * @arg for this ioctl is the @id field of READ request. + */ +#define CACHEFILES_IOC_READ_COMPLETE _IOW(0x98, 1, int) + #endif -- cgit v1.2.3 From c7fb19428d67dd0a2a78a4f237af01d39c78dc5a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 30 Apr 2022 14:38:53 -0600 Subject: io_uring: add support for ring mapped supplied buffers Provided buffers allow an application to supply io_uring with buffers that can then be grabbed for a read/receive request, when the data source is ready to deliver data. The existing scheme relies on using IORING_OP_PROVIDE_BUFFERS to do that, but it can be difficult to use in real world applications. It's pretty efficient if the application is able to supply back batches of provided buffers when they have been consumed and the application is ready to recycle them, but if fragmentation occurs in the buffer space, it can become difficult to supply enough buffers at the time. This hurts efficiency. Add a register op, IORING_REGISTER_PBUF_RING, which allows an application to setup a shared queue for each buffer group of provided buffers. The application can then supply buffers simply by adding them to this ring, and the kernel can consume then just as easily. The ring shares the head with the application, the tail remains private in the kernel. Provided buffers setup with IORING_REGISTER_PBUF_RING cannot use IORING_OP_{PROVIDE,REMOVE}_BUFFERS for adding or removing entries to the ring, they must use the mapped ring. Mapped provided buffer rings can co-exist with normal provided buffers, just not within the same group ID. To gauge overhead of the existing scheme and evaluate the mapped ring approach, a simple NOP benchmark was written. It uses a ring of 128 entries, and submits/completes 32 at the time. 'Replenish' is how many buffers are provided back at the time after they have been consumed: Test Replenish NOPs/sec ================================================================ No provided buffers NA ~30M Provided buffers 32 ~16M Provided buffers 1 ~10M Ring buffers 32 ~27M Ring buffers 1 ~27M The ring mapped buffers perform almost as well as not using provided buffers at all, and they don't care if you provided 1 or more back at the same time. This means application can just replenish as they go, rather than need to batch and compact, further reducing overhead in the application. The NOP benchmark above doesn't need to do any compaction, so that overhead isn't even reflected in the above test. Co-developed-by: Dylan Yudaken Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 15f821af9242..ddf969ae5a79 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -384,6 +384,10 @@ enum { IORING_REGISTER_RING_FDS = 20, IORING_UNREGISTER_RING_FDS = 21, + /* register ring based provide buffer group */ + IORING_REGISTER_PBUF_RING = 22, + IORING_UNREGISTER_PBUF_RING = 23, + /* this goes last */ IORING_REGISTER_LAST }; @@ -461,6 +465,38 @@ struct io_uring_restriction { __u32 resv2[3]; }; +struct io_uring_buf { + __u64 addr; + __u32 len; + __u16 bid; + __u16 resv; +}; + +struct io_uring_buf_ring { + union { + /* + * To avoid spilling into more pages than we need to, the + * ring tail is overlaid with the io_uring_buf->resv field. + */ + struct { + __u64 resv1; + __u32 resv2; + __u16 resv3; + __u16 tail; + }; + struct io_uring_buf bufs[0]; + }; +}; + +/* argument for IORING_(UN)REGISTER_PBUF_RING */ +struct io_uring_buf_reg { + __u64 ring_addr; + __u32 ring_entries; + __u16 bgid; + __u16 pad; + __u64 resv[3]; +}; + /* * io_uring_restriction->opcode values */ -- cgit v1.2.3 From c1318b39c7d36bd5139a9c71044ff2b2d3c6f9d8 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 18 May 2022 12:27:31 +0300 Subject: tls: Add opt-in zerocopy mode of sendfile() TLS device offload copies sendfile data to a bounce buffer before transmitting. It allows to maintain the valid MAC on TLS records when the file contents change and a part of TLS record has to be retransmitted on TCP level. In many common use cases (like serving static files over HTTPS) the file contents are not changed on the fly. In many use cases breaking the connection is totally acceptable if the file is changed during transmission, because it would be received corrupted in any case. This commit allows to optimize performance for such use cases to providing a new optional mode of TLS sendfile(), in which the extra copy is skipped. Removing this copy improves performance significantly, as TLS and TCP sendfile perform the same operations, and the only overhead is TLS header/trailer insertion. The new mode can only be enabled with the new socket option named TLS_TX_ZEROCOPY_SENDFILE on per-socket basis. It preserves backwards compatibility with existing applications that rely on the copying behavior. The new mode is safe, meaning that unsolicited modifications of the file being sent can't break integrity of the kernel. The worst thing that can happen is sending a corrupted TLS record, which is in any case not forbidden when using regular TCP sockets. Sockets other than TLS device offload are not affected by the new socket option. The actual status of zerocopy sendfile can be queried with sock_diag. Performance numbers in a single-core test with 24 HTTPS streams on nginx, under 100% CPU load: * non-zerocopy: 33.6 Gbit/s * zerocopy: 79.92 Gbit/s CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz Signed-off-by: Boris Pismenny Signed-off-by: Tariq Toukan Signed-off-by: Maxim Mikityanskiy Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220518092731.1243494-1-maximmi@nvidia.com Signed-off-by: Paolo Abeni --- include/uapi/linux/tls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 5f38be0ec0f3..ac39328eabe7 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -39,6 +39,7 @@ /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ +#define TLS_TX_ZEROCOPY_SENDFILE 3 /* transmit zerocopy sendfile */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -160,6 +161,7 @@ enum { TLS_INFO_CIPHER, TLS_INFO_TXCONF, TLS_INFO_RXCONF, + TLS_INFO_ZC_SENDFILE, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) -- cgit v1.2.3 From 58e5bdeb9c2b06895e723c0b1e670f54510ff782 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Fri, 20 May 2022 14:36:30 +0530 Subject: nvme: enable uring-passthrough for admin commands Add two new opcodes that userspace can use for admin commands: NVME_URING_CMD_ADMIN : non-vectroed NVME_URING_CMD_ADMIN_VEC : vectored variant Wire up support when these are issued on controller node(/dev/nvmeX). Signed-off-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220520090630.70394-3-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/uapi/linux/nvme_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 0b1876aa5a59..2f76cba67166 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -108,5 +108,7 @@ struct nvme_uring_cmd { /* io_uring async commands: */ #define NVME_URING_CMD_IO _IOWR('N', 0x80, struct nvme_uring_cmd) #define NVME_URING_CMD_IO_VEC _IOWR('N', 0x81, struct nvme_uring_cmd) +#define NVME_URING_CMD_ADMIN _IOWR('N', 0x82, struct nvme_uring_cmd) +#define NVME_URING_CMD_ADMIN_VEC _IOWR('N', 0x83, struct nvme_uring_cmd) #endif /* _UAPI_LINUX_NVME_IOCTL_H */ -- cgit v1.2.3 From 3bc253c2e652cf5f12cd8c00d80d8ec55d67d1a7 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 19 May 2022 16:30:10 -0700 Subject: bpf: Add bpf_skc_to_mptcp_sock_proto This patch implements a new struct bpf_func_proto, named bpf_skc_to_mptcp_sock_proto. Define a new bpf_id BTF_SOCK_TYPE_MPTCP, and a new helper bpf_skc_to_mptcp_sock(), which invokes another new helper bpf_mptcp_sock_from_subflow() in net/mptcp/bpf.c to get struct mptcp_sock from a given subflow socket. v2: Emit BTF type, add func_id checks in verifier.c and bpf_trace.c, remove build check for CONFIG_BPF_JIT v5: Drop EXPORT_SYMBOL (Martin) Co-developed-by: Nicolas Rybowski Co-developed-by: Matthieu Baerts Signed-off-by: Nicolas Rybowski Signed-off-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220519233016.105670-2-mathew.j.martineau@linux.intel.com --- include/uapi/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0210f85131b3..56688bee20d9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5172,6 +5172,12 @@ union bpf_attr { * Return * Map value associated to *key* on *cpu*, or **NULL** if no entry * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5370,6 +5376,7 @@ union bpf_attr { FN(ima_file_hash), \ FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From a13e248ff90e81e9322406c0e618cf2168702f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 6 May 2022 18:08:11 +0200 Subject: landlock: Fix landlock_add_rule(2) documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is not mandatory to pass a file descriptor obtained with the O_PATH flag. Also, replace rule's accesses with ruleset's accesses. Link: https://lore.kernel.org/r/20220506160820.524344-2-mic@digikod.net Cc: stable@vger.kernel.org Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 15c31abb0d76..21c8d58283c9 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -62,8 +62,9 @@ struct landlock_path_beneath_attr { */ __u64 allowed_access; /** - * @parent_fd: File descriptor, open with ``O_PATH``, which identifies - * the parent directory of a file hierarchy, or just a file. + * @parent_fd: File descriptor, preferably opened with ``O_PATH``, + * which identifies the parent directory of a file hierarchy, or just a + * file. */ __s32 parent_fd; /* -- cgit v1.2.3 From b91c3e4ea756b12b7d992529226edce1cfd854d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 6 May 2022 18:10:57 +0200 Subject: landlock: Add support for file reparenting with LANDLOCK_ACCESS_FS_REFER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new LANDLOCK_ACCESS_FS_REFER access right to enable policy writers to allow sandboxed processes to link and rename files from and to a specific set of file hierarchies. This access right should be composed with LANDLOCK_ACCESS_FS_MAKE_* for the destination of a link or rename, and with LANDLOCK_ACCESS_FS_REMOVE_* for a source of a rename. This lift a Landlock limitation that always denied changing the parent of an inode. Renaming or linking to the same directory is still always allowed, whatever LANDLOCK_ACCESS_FS_REFER is used or not, because it is not considered a threat to user data. However, creating multiple links or renaming to a different parent directory may lead to privilege escalations if not handled properly. Indeed, we must be sure that the source doesn't gain more privileges by being accessible from the destination. This is handled by making sure that the source hierarchy (including the referenced file or directory itself) restricts at least as much the destination hierarchy. If it is not the case, an EXDEV error is returned, making it potentially possible for user space to copy the file hierarchy instead of moving or linking it. Instead of creating different access rights for the source and the destination, we choose to make it simple and consistent for users. Indeed, considering the previous constraint, it would be weird to require such destination access right to be also granted to the source (to make it a superset). Moreover, RENAME_EXCHANGE would also add to the confusion because of paths being both a source and a destination. See the provided documentation for additional details. New tests are provided with a following commit. Reviewed-by: Paul Moore Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20220506161102.525323-8-mic@digikod.net --- include/uapi/linux/landlock.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 21c8d58283c9..23df4e0e8ace 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -21,8 +21,14 @@ struct landlock_ruleset_attr { /** * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_) * that is handled by this ruleset and should then be forbidden if no - * rule explicitly allow them. This is needed for backward - * compatibility reasons. + * rule explicitly allow them: it is a deny-by-default list that should + * contain as much Landlock access rights as possible. Indeed, all + * Landlock filesystem access rights that are not part of + * handled_access_fs are allowed. This is needed for backward + * compatibility reasons. One exception is the + * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly + * handled, but must still be explicitly handled to add new rules with + * this access right. */ __u64 handled_access_fs; }; @@ -112,6 +118,22 @@ struct landlock_path_beneath_attr { * - %LANDLOCK_ACCESS_FS_MAKE_FIFO: Create (or rename or link) a named pipe. * - %LANDLOCK_ACCESS_FS_MAKE_BLOCK: Create (or rename or link) a block device. * - %LANDLOCK_ACCESS_FS_MAKE_SYM: Create (or rename or link) a symbolic link. + * - %LANDLOCK_ACCESS_FS_REFER: Link or rename a file from or to a different + * directory (i.e. reparent a file hierarchy). This access right is + * available since the second version of the Landlock ABI. This is also the + * only access right which is always considered handled by any ruleset in + * such a way that reparenting a file hierarchy is always denied by default. + * To avoid privilege escalation, it is not enough to add a rule with this + * access right. When linking or renaming a file, the destination directory + * hierarchy must also always have the same or a superset of restrictions of + * the source hierarchy. If it is not the case, or if the domain doesn't + * handle this access right, such actions are denied by default with errno + * set to EXDEV. Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access + * right on the destination directory, and renaming also requires a + * LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or + * directory) parent. Otherwise, such actions are denied with errno set to + * EACCES. The EACCES errno prevails over EXDEV to let user space + * efficiently deal with an unrecoverable error. * * .. warning:: * @@ -137,6 +159,7 @@ struct landlock_path_beneath_attr { #define LANDLOCK_ACCESS_FS_MAKE_FIFO (1ULL << 10) #define LANDLOCK_ACCESS_FS_MAKE_BLOCK (1ULL << 11) #define LANDLOCK_ACCESS_FS_MAKE_SYM (1ULL << 12) +#define LANDLOCK_ACCESS_FS_REFER (1ULL << 13) /* clang-format on */ #endif /* _UAPI_LINUX_LANDLOCK_H */ -- cgit v1.2.3 From 97e03f521050c092919591e668107b3d69c5f426 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:07 -0700 Subject: bpf: Add verifier support for dynptrs This patch adds the bulk of the verifier work for supporting dynamic pointers (dynptrs) in bpf. A bpf_dynptr is opaque to the bpf program. It is a 16-byte structure defined internally as: struct bpf_dynptr_kern { void *data; u32 size; u32 offset; } __aligned(8); The upper 8 bits of *size* is reserved (it contains extra metadata about read-only status and dynptr type). Consequently, a dynptr only supports memory less than 16 MB. There are different types of dynptrs (eg malloc, ringbuf, ...). In this patchset, the most basic one, dynptrs to a bpf program's local memory, is added. For now only local memory that is of reg type PTR_TO_MAP_VALUE is supported. In the verifier, dynptr state information will be tracked in stack slots. When the program passes in an uninitialized dynptr (ARG_PTR_TO_DYNPTR | MEM_UNINIT), the stack slots corresponding to the frame pointer where the dynptr resides at are marked STACK_DYNPTR. For helper functions that take in initialized dynptrs (eg bpf_dynptr_read + bpf_dynptr_write which are added later in this patchset), the verifier enforces that the dynptr has been initialized properly by checking that their corresponding stack slots have been marked as STACK_DYNPTR. The 6th patch in this patchset adds test cases that the verifier should successfully reject, such as for example attempting to use a dynptr after doing a direct write into it inside the bpf program. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20220523210712.3641569-2-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 56688bee20d9..610944cb3389 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6528,6 +6528,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. -- cgit v1.2.3 From 263ae152e96253f40c2c276faad8629e096b3bad Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:08 -0700 Subject: bpf: Add bpf_dynptr_from_mem for local dynptrs This patch adds a new api bpf_dynptr_from_mem: long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr); which initializes a dynptr to point to a bpf program's local memory. For now only local memory that is of reg type PTR_TO_MAP_VALUE is supported. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220523210712.3641569-3-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 610944cb3389..9be3644457dd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5178,6 +5178,17 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5377,6 +5388,7 @@ union bpf_attr { FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From bc34dee65a65e9c920c420005b8a43f2a721a458 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:09 -0700 Subject: bpf: Dynptr support for ring buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, our only way of writing dynamically-sized data into a ring buffer is through bpf_ringbuf_output but this incurs an extra memcpy cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra memcpy, but it can only safely support reservation sizes that are statically known since the verifier cannot guarantee that the bpf program won’t access memory outside the reserved space. The bpf_dynptr abstraction allows for dynamically-sized ring buffer reservations without the extra memcpy. There are 3 new APIs: long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr); void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags); void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags); These closely follow the functionalities of the original ringbuf APIs. For example, all ringbuffer dynptrs that have been reserved must be either submitted or discarded before the program exits. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20220523210712.3641569-4-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9be3644457dd..081a55540aa5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5189,6 +5189,38 @@ union bpf_attr { * Return * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5389,6 +5421,9 @@ union bpf_attr { FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 13bbbfbea7598ea9f8d9c3d73bf053bb57f9c4b2 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:10 -0700 Subject: bpf: Add bpf_dynptr_read and bpf_dynptr_write This patch adds two helper functions, bpf_dynptr_read and bpf_dynptr_write: long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset); long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len); The dynptr passed into these functions must be valid dynptrs that have been initialized. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220523210712.3641569-5-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 081a55540aa5..efe2505650e6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5221,6 +5221,23 @@ union bpf_attr { * 'bpf_ringbuf_discard'. * Return * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5424,6 +5441,8 @@ union bpf_attr { FN(ringbuf_reserve_dynptr), \ FN(ringbuf_submit_dynptr), \ FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 34d4ef5775f776ec4b0d53a02d588bf3195cada6 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:11 -0700 Subject: bpf: Add dynptr data slices This patch adds a new helper function void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len); which returns a pointer to the underlying data of a dynptr. *len* must be a statically known value. The bpf program may access the returned data slice as a normal buffer (eg can do direct reads and writes), since the verifier associates the length with the returned pointer, and enforces that no out of bounds accesses occur. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220523210712.3641569-6-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index efe2505650e6..f4009dbdf62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5238,6 +5238,17 @@ union bpf_attr { * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5443,6 +5454,7 @@ union bpf_attr { FN(ringbuf_discard_dynptr), \ FN(dynptr_read), \ FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From e5499dd7253c8382d03f687f19a854adcc688357 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 25 May 2022 14:08:30 +0100 Subject: media: lirc: revert removal of unused feature flags Commit b2a90f4fcb14 ("media: lirc: remove unused lirc features") removed feature flags which were never implemented, but they are still used by the lirc daemon went built from source. Reinstate these symbols in order not to break the lirc build. Fixes: b2a90f4fcb14 ("media: lirc: remove unused lirc features") Link: https://lore.kernel.org/all/a0470450-ecfd-2918-e04a-7b57c1fd7694@kernel.org/ Reported-by: Jiri Slaby Cc: Mauro Carvalho Chehab Signed-off-by: Sean Young Signed-off-by: Linus Torvalds --- include/uapi/linux/lirc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h index 23b0f2c8ba81..8d7ca7c6af42 100644 --- a/include/uapi/linux/lirc.h +++ b/include/uapi/linux/lirc.h @@ -84,6 +84,13 @@ #define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) #define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) +/* + * Unused features. These features were never implemented, in tree or + * out of tree. These definitions are here so not to break the lircd build. + */ +#define LIRC_CAN_SET_REC_FILTER 0 +#define LIRC_CAN_NOTIFY_DECODE 0 + /*** IOCTL commands for lirc driver ***/ #define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) -- cgit v1.2.3 From caa28984163cb63ea0be4cb8dbf05defdc7303f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 25 May 2022 09:02:19 -0700 Subject: linux/types.h: reinstate "__bitwise__" macro for user space use Commit c724c866bb70 ("linux/types.h: remove unnecessary __bitwise__") was right that there are no users of __bitwise__ in the kernel, but it turns out there are user space users of it that do expect it. It is, after all, in the uapi directory, so user space usage is to be expected. Instead of reverting the commit completely, let's just clarify the situation so that it doesn't happen again, and have some in-code explanations for why that "__bitwise__" still exists. Reported-by: Jiri Slaby Cc: Bjorn Helgaas Link: https://lore.kernel.org/all/b5c0a68d-8387-4909-beea-f70ab9e6e3d5@kernel.org/ Signed-off-by: Linus Torvalds --- include/uapi/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h index c4dc597f3dcf..308433be33c2 100644 --- a/include/uapi/linux/types.h +++ b/include/uapi/linux/types.h @@ -26,6 +26,9 @@ #define __bitwise #endif +/* The kernel doesn't use this legacy form, but user space does */ +#define __bitwise__ __bitwise + typedef __u16 __bitwise __le16; typedef __u16 __bitwise __be16; typedef __u32 __bitwise __le32; -- cgit v1.2.3 From a7c41b4687f5902af70cd559806990930c8a307b Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Mon, 30 May 2022 21:15:20 +0800 Subject: io_uring: let IORING_OP_FILES_UPDATE support choosing fixed file slots One big issue with the file registration feature is that it needs user space apps to maintain free slot info about io_uring's fixed file table, which really is a burden for development. io_uring now supports choosing free file slot for user space apps by using IORING_FILE_INDEX_ALLOC flag in accept, open, and socket operations, but they need the app to use direct accept or direct open, which not all apps are prepared to use yet. To support apps that still need real fds, make use of the registration feature easier. Let IORING_OP_FILES_UPDATE support choosing fixed file slots, which will store picked fixed files slots in fd array and let cqe return the number of slots allocated. Suggested-by: Hao Xu Signed-off-by: Xiaoguang Wang [axboe: move flag to uapi io_uring header, change goto to break, init] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 53e7dae92e42..776e0278f9dd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -47,6 +47,7 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; + __u32 close_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -258,6 +259,11 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) +/* + * close flags, store in sqe->close_flags + */ +#define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 3e0b8f529c10037ae0b369fc892e524eae5a5485 Mon Sep 17 00:00:00 2001 From: Arun Ajith S Date: Mon, 30 May 2022 10:14:14 +0000 Subject: net/ipv6: Expand and rename accept_unsolicited_na to accept_untracked_na RFC 9131 changes default behaviour of handling RX of NA messages when the corresponding entry is absent in the neighbour cache. The current implementation is limited to accept just unsolicited NAs. However, the RFC is more generic where it also accepts solicited NAs. Both types should result in adding a STALE entry for this case. Expand accept_untracked_na behaviour to also accept solicited NAs to be compliant with the RFC and rename the sysctl knob to accept_untracked_na. Fixes: f9a2fb73318e ("net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131") Signed-off-by: Arun Ajith S Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220530101414.65439-1-aajith@arista.com Signed-off-by: Paolo Abeni --- include/uapi/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 549ddeaf788b..03cdbe798fe3 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -194,7 +194,7 @@ enum { DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, - DEVCONF_ACCEPT_UNSOLICITED_NA, + DEVCONF_ACCEPT_UNTRACKED_NA, DEVCONF_MAX }; -- cgit v1.2.3 From 13b00b135665c92065a27c0c39dd97e0f380bd4f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 18 May 2022 16:38:00 +0300 Subject: vdpa: Add support for querying vendor statistics Allows to read vendor statistics of a vdpa device. The specific statistics data are received from the upstream driver in the form of an (attribute name, attribute value) pairs. An example of statistics for mlx5_vdpa device are: received_desc - number of descriptors received by the virtqueue completed_desc - number of descriptors completed by the virtqueue A descriptor using indirect buffers is still counted as 1. In addition, N chained descriptors are counted correctly N times as one would expect. A new callback was added to vdpa_config_ops which provides the means for the vdpa driver to return statistics results. The interface allows for reading all the supported virtqueues, including the control virtqueue if it exists. Below are some examples taken from mlx5_vdpa which are introduced in the following patch: 1. Read statistics for the virtqueue at index 1 $ vdpa dev vstats show vdpa-a qidx 1 vdpa-a: queue_type tx queue_index 1 received_desc 3844836 completed_desc 3844836 2. Read statistics for the virtqueue at index 32 $ vdpa dev vstats show vdpa-a qidx 32 vdpa-a: queue_type control_vq queue_index 32 received_desc 62 completed_desc 62 3. Read statisitics for the virtqueue at index 0 with json output $ vdpa -j dev vstats show vdpa-a qidx 0 {"vstats":{"vdpa-a":{ "queue_type":"rx","queue_index":0,"name":"received_desc","value":417776,\ "name":"completed_desc","value":417548}}} 4. Read statistics for the virtqueue at index 0 with preety json output $ vdpa -jp dev vstats show vdpa-a qidx 0 { "vstats": { "vdpa-a": { "queue_type": "rx", "queue_index": 0, "name": "received_desc", "value": 417776, "name": "completed_desc", "value": 417548 } } } Signed-off-by: Eli Cohen Message-Id: <20220518133804.1075129-3-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vdpa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 1061d8d2d09d..25c55cab3d7c 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -18,6 +18,7 @@ enum vdpa_command { VDPA_CMD_DEV_DEL, VDPA_CMD_DEV_GET, /* can dump */ VDPA_CMD_DEV_CONFIG_GET, /* can dump */ + VDPA_CMD_DEV_VSTATS_GET, }; enum vdpa_attr { @@ -46,6 +47,11 @@ enum vdpa_attr { VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ + + VDPA_ATTR_DEV_QUEUE_INDEX, /* u32 */ + VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ + VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From 175d493c3c3e09a3abaa843068fae0f0ad42c47e Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:41 +0530 Subject: vhost: move the backend feature bits to vhost_types.h We should store feature bits in vhost_types.h as what has been done for e.g VHOST_F_LOG_ALL. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-2-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 5 ----- include/uapi/linux/vhost_types.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 5d99e7c242a2..8f7b4a95d6f9 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -89,11 +89,6 @@ /* Set or get vhost backend capability */ -/* Use message type V2 */ -#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 -/* IOTLB can accept batching hints */ -#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 - #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index f7f6a3a28977..76ee7016c501 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -153,4 +153,9 @@ struct vhost_vdpa_iova_range { /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ #define VHOST_NET_F_VIRTIO_NET_HDR 27 +/* Use message type V2 */ +#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 + #endif -- cgit v1.2.3 From 91233ad711866f4e375742d84ef3ed6aab9daa96 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:49 +0530 Subject: vhost: support ASID in IOTLB API This patches allows userspace to send ASID based IOTLB message to vhost. This idea is to use the reserved u32 field in the existing V2 IOTLB message. Vhost device should advertise this capability via VHOST_BACKEND_F_IOTLB_ASID backend feature. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-10-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost_types.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 76ee7016c501..634cee485abb 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -87,7 +87,7 @@ struct vhost_msg { struct vhost_msg_v2 { __u32 type; - __u32 reserved; + __u32 asid; union { struct vhost_iotlb_msg iotlb; __u8 padding[64]; @@ -157,5 +157,9 @@ struct vhost_vdpa_iova_range { #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 /* IOTLB can accept batching hints */ #define VHOST_BACKEND_F_IOTLB_BATCH 0x2 +/* IOTLB can accept address space identifier through V2 type of IOTLB + * message + */ +#define VHOST_BACKEND_F_IOTLB_ASID 0x3 #endif -- cgit v1.2.3 From 3ace88bd37436abc84906312146fe5158a469142 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:51 +0530 Subject: vhost-vdpa: introduce uAPI to get the number of virtqueue groups Follows the vDPA support for multiple address spaces, this patch introduce uAPI for the userspace to know the number of virtqueue groups supported by the vDPA device. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-12-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 8f7b4a95d6f9..61317c61d768 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -145,11 +145,13 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) - /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) /* Get the count of all virtqueues */ #define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + #endif -- cgit v1.2.3 From a0c95f201170bd559737d3cdc8a950aea62f29c6 Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:52 +0530 Subject: vhost-vdpa: introduce uAPI to get the number of address spaces This patch introduces the uAPI for getting the number of address spaces supported by this vDPA device. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-13-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 61317c61d768..51322008901a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -154,4 +154,6 @@ /* Get the number of virtqueue groups. */ #define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) +/* Get the number of address spaces. */ +#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) #endif -- cgit v1.2.3 From 2d1fcb7758e49fd9caf150f3c70804b95b2ce80c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:53 +0530 Subject: vhost-vdpa: uAPI to get virtqueue group id Follows the support for virtqueue group in vDPA. This patches introduces uAPI to get the virtqueue group ID for a specific virtqueue in vhost-vdpa. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-14-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 51322008901a..668914c87f74 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -156,4 +156,12 @@ /* Get the number of address spaces. */ #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +/* Get the group for a virtqueue: read index, write group in num, + * The virtqueue index is stored in the index field of + * vhost_vring_state. The group for this specific virtqueue is + * returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ + struct vhost_vring_state) #endif -- cgit v1.2.3 From 84d7c8fd3aade2fe79313003ed06ede431ec2a6d Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Wed, 30 Mar 2022 23:33:54 +0530 Subject: vhost-vdpa: introduce uAPI to set group ASID Follows the vDPA support for associating ASID to a specific virtqueue group. This patch adds a uAPI to support setting them from userspace. Signed-off-by: Jason Wang Signed-off-by: Gautam Dawar Message-Id: <20220330180436.24644-15-gdawar@xilinx.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 668914c87f74..cab645d4a645 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -164,4 +164,11 @@ */ #define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ struct vhost_vring_state) +/* Set the ASID for a virtqueue group. The group index is stored in + * the index field of vhost_vring_state, the ASID associated with this + * group is stored at num field of vhost_vring_state. + */ +#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ + struct vhost_vring_state) + #endif -- cgit v1.2.3 From 35d02493dba1ae6386fac07072908717affc3ff8 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 May 2022 16:36:21 +0000 Subject: KVM: s390: pv: Add query interface Some of the query information is already available via sysfs but having a IOCTL makes the information easier to retrieve. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20220517163629.3443-4-frankja@linux.ibm.com Message-Id: <20220517163629.3443-4-frankja@linux.ibm.com> Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5088bd9f1922..5a5f66026dd3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1660,6 +1660,30 @@ struct kvm_s390_pv_unp { __u64 tweak; }; +enum pv_cmd_info_id { + KVM_PV_INFO_VM, +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + struct kvm_s390_pv_info_vm vm; +}; + enum pv_cmd_id { KVM_PV_ENABLE, KVM_PV_DISABLE, @@ -1668,6 +1692,7 @@ enum pv_cmd_id { KVM_PV_VERIFY, KVM_PV_PREP_RESET, KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, }; struct kvm_pv_cmd { -- cgit v1.2.3 From fe9a93e07ba4f29def2f8a4318b63e0c70a5c6c2 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 May 2022 16:36:23 +0000 Subject: KVM: s390: pv: Add query dump information The dump API requires userspace to provide buffers into which we will store data. The dump information added in this patch tells userspace how big those buffers need to be. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20220517163629.3443-6-frankja@linux.ibm.com Message-Id: <20220517163629.3443-6-frankja@linux.ibm.com> Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5a5f66026dd3..065a05ec06b6 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1662,6 +1662,13 @@ struct kvm_s390_pv_unp { enum pv_cmd_info_id { KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; }; struct kvm_s390_pv_info_vm { @@ -1681,7 +1688,10 @@ struct kvm_s390_pv_info_header { struct kvm_s390_pv_info { struct kvm_s390_pv_info_header header; - struct kvm_s390_pv_info_vm vm; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; }; enum pv_cmd_id { -- cgit v1.2.3 From 0460eb35b443f73f8a8e3be1ea87bd690a852e20 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 May 2022 16:36:24 +0000 Subject: KVM: s390: Add configuration dump functionality Sometimes dumping inside of a VM fails, is unavailable or doesn't yield the required data. For these occasions we dump the VM from the outside, writing memory and cpu data to a file. Up to now PV guests only supported dumping from the inside of the guest through dumpers like KDUMP. A PV guest can be dumped from the hypervisor but the data will be stale and / or encrypted. To get the actual state of the PV VM we need the help of the Ultravisor who safeguards the VM state. New UV calls have been added to initialize the dump, dump storage state data, dump cpu data and complete the dump process. We expose these calls in this patch via a new UV ioctl command. The sensitive parts of the dump data are encrypted, the dump key is derived from the Customer Communication Key (CCK). This ensures that only the owner of the VM who has the CCK can decrypt the dump data. The memory is dumped / read via a normal export call and a re-import after the dump initialization is not needed (no re-encryption with a dump key). Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220517163629.3443-7-frankja@linux.ibm.com Message-Id: <20220517163629.3443-7-frankja@linux.ibm.com> Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 065a05ec06b6..673be2061c6c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1660,6 +1660,20 @@ struct kvm_s390_pv_unp { __u64 tweak; }; +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + enum pv_cmd_info_id { KVM_PV_INFO_VM, KVM_PV_INFO_DUMP, @@ -1703,6 +1717,7 @@ enum pv_cmd_id { KVM_PV_PREP_RESET, KVM_PV_UNSHARE_ALL, KVM_PV_INFO, + KVM_PV_DUMP, }; struct kvm_pv_cmd { -- cgit v1.2.3 From 8aba09588d2af37c6cc1a781b87d1d91ebf389ae Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 May 2022 16:36:25 +0000 Subject: KVM: s390: Add CPU dump functionality The previous patch introduced the per-VM dump functions now let's focus on dumping the VCPU state via the newly introduced KVM_S390_PV_CPU_COMMAND ioctl which mirrors the VM UV ioctl and can be extended with new commands later. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220517163629.3443-8-frankja@linux.ibm.com Message-Id: <20220517163629.3443-8-frankja@linux.ibm.com> Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 673be2061c6c..af5d254f8061 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1664,6 +1664,7 @@ enum pv_cmd_dmp_id { KVM_PV_DUMP_INIT, KVM_PV_DUMP_CONFIG_STOR_STATE, KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, }; struct kvm_s390_pv_dmp { @@ -2168,4 +2169,7 @@ struct kvm_stats_desc { /* Available with KVM_CAP_XSAVE2 */ #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +/* Available with KVM_CAP_S390_PROTECTED_DUMP */ +#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From e9bf3acb23f0a6e18438c35944d6cb618d16cf05 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 17 May 2022 16:36:26 +0000 Subject: KVM: s390: Add KVM_CAP_S390_PROTECTED_DUMP The capability indicates dump support for protected VMs. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20220517163629.3443-9-frankja@linux.ibm.com Message-Id: <20220517163629.3443-9-frankja@linux.ibm.com> Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index af5d254f8061..c4a32910b88a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1157,6 +1157,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_TSC_CONTROL 214 #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 +#define KVM_CAP_S390_PROTECTED_DUMP 217 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 662ce1dc9caf493c309200edbe38d186f1ea20d0 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 1 Jun 2022 15:55:25 -0700 Subject: delayacct: track delays from write-protect copy Delay accounting does not track the delay of write-protect copy. When tasks trigger many write-protect copys(include COW and unsharing of anonymous pages[1]), it may spend a amount of time waiting for them. To get the delay of tasks in write-protect copy, could help users to evaluate the impact of using KSM or fork() or GUP. Also update tools/accounting/getdelays.c: / # ./getdelays -dl -p 231 print delayacct stats ON listen forever PID 231 CPU count real total virtual total delay total delay average 6247 1859000000 2154070021 1674255063 0.268ms IO count delay total delay average 0 0 0ms SWAP count delay total delay average 0 0 0ms RECLAIM count delay total delay average 0 0 0ms THRASHING count delay total delay average 0 0 0ms COMPACT count delay total delay average 3 72758 0ms WPCOPY count delay total delay average 3635 271567604 0ms [1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages") Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Reviewed-by: David Hildenbrand Reviewed-by: Jiang Xuexin Reviewed-by: Ran Xiaokai Reviewed-by: wangyong Cc: Jonathan Corbet Cc: Balbir Singh Cc: Mike Kravetz Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 736154171489..a7f5b11a8f1b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 12 +#define TASKSTATS_VERSION 13 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -194,6 +194,10 @@ struct taskstats { __u64 ac_exe_dev; /* program binary device ID */ __u64 ac_exe_inode; /* program binary inode number */ /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; }; -- cgit v1.2.3 From 8d3398ba2a0d1e25690f830192b7834acab003ec Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 31 May 2022 11:43:45 +0200 Subject: socket: Don't use u8 type in uapi socket.h Use plain 255 instead, which also avoid introducing an additional header dependency on Fixes: 26859240e4ee ("txhash: Add socket option to control TX hash rethink behavior") Signed-off-by: Tobias Klauser Link: https://lore.kernel.org/r/20220531094345.13801-1-tklauser@distanz.ch Signed-off-by: Jakub Kicinski --- include/uapi/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index 51d6bb2f6765..d3fcd3b5ec53 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -31,7 +31,7 @@ struct __kernel_sockaddr_storage { #define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) -#define SOCK_TXREHASH_DEFAULT ((u8)-1) +#define SOCK_TXREHASH_DEFAULT 255 #define SOCK_TXREHASH_DISABLED 0 #define SOCK_TXREHASH_ENABLED 1 -- cgit v1.2.3 From 8cc5b032240ae5220b62c689c20459d3e1825b2d Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Wed, 1 Jun 2022 01:00:17 +0000 Subject: binder: fix sender_euid type in uapi header The {pid,uid}_t fields of struct binder_transaction were recently replaced to use kernel types in commit 169adc2b6b3c ("android/binder.h: add linux/android/binder(fs).h to UAPI compile-test coverage"). However, using __kernel_uid_t here breaks backwards compatibility in architectures using 16-bits for this type, since glibc and some others still expect a 32-bit uid_t. Instead, let's use __kernel_uid32_t which avoids this compatibility problem. Fixes: 169adc2b6b3c ("android/binder.h: add linux/android/binder(fs).h to UAPI compile-test coverage") Reported-by: Christopher Ferris Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Signed-off-by: Arnd Bergmann --- include/uapi/linux/android/binder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 11157fae8a8e..688bcdaeed53 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -289,7 +289,7 @@ struct binder_transaction_data { /* General information about the transaction. */ __u32 flags; __kernel_pid_t sender_pid; - __kernel_uid_t sender_euid; + __kernel_uid32_t sender_euid; binder_size_t data_size; /* number of bytes of data */ binder_size_t offsets_size; /* number of bytes of offsets */ -- cgit v1.2.3 From 08145b087e4481458f6075f3af58021a3cf8a940 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 31 May 2022 18:04:10 +0800 Subject: LoongArch: Add ELF-related definitions Add ELF-related definitions for LoongArch, including: EM_LOONGARCH, KEXEC_ARCH_LOONGARCH, AUDIT_ARCH_LOONGARCH32, AUDIT_ARCH_LOONGARCH64 and NT_LOONGARCH_*. Reviewed-by: WANG Xuerui Reviewed-by: Jiaxun Yang Signed-off-by: Huacai Chen --- include/uapi/linux/audit.h | 2 ++ include/uapi/linux/elf-em.h | 1 + include/uapi/linux/elf.h | 5 +++++ include/uapi/linux/kexec.h | 1 + 4 files changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 8eda133ca4c1..7c1dc818b1d5 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -439,6 +439,8 @@ enum { #define AUDIT_ARCH_UNICORE (EM_UNICORE|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_XTENSA (EM_XTENSA) +#define AUDIT_ARCH_LOONGARCH32 (EM_LOONGARCH|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_LOONGARCH64 (EM_LOONGARCH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index f47e853546fa..ef38c2bc5ab7 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -51,6 +51,7 @@ #define EM_RISCV 243 /* RISC-V */ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_CSKY 252 /* C-SKY */ +#define EM_LOONGARCH 258 /* LoongArch */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ /* diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c4abd09c3da9..2b9f5e9985e5 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -438,6 +438,11 @@ typedef struct elf64_shdr { #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ /* Note types with note name "GNU" */ #define NT_GNU_PROPERTY_TYPE_0 5 diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index fb7e2ef60825..981016e05cfa 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -43,6 +43,7 @@ #define KEXEC_ARCH_MIPS ( 8 << 16) #define KEXEC_ARCH_AARCH64 (183 << 16) #define KEXEC_ARCH_RISCV (243 << 16) +#define KEXEC_ARCH_LOONGARCH (258 << 16) /* The artificial cap on the number of segments passed to kexec_load. */ #define KEXEC_SEGMENT_MAX 16 -- cgit v1.2.3 From 6089fb325cf737eeb2c4d236c94697112ca860da Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:26:00 -0700 Subject: bpf: Add btf enum64 support Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM. But in kernel, some enum indeed has 64bit values, e.g., in uapi bpf.h, we have enum { BPF_F_INDEX_MASK = 0xffffffffULL, BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, BPF_F_CTXLEN_MASK = (0xfffffULL << 32), }; In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK as 0, which certainly is incorrect. This patch added a new btf kind, BTF_KIND_ENUM64, which permits 64bit value to cover the above use case. The BTF_KIND_ENUM64 has the following three fields followed by the common type: struct bpf_enum64 { __u32 nume_off; __u32 val_lo32; __u32 val_hi32; }; Currently, btf type section has an alignment of 4 as all element types are u32. Representing the value with __u64 will introduce a pad for bpf_enum64 and may also introduce misalignment for the 64bit value. Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues. The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64 to indicate whether the value is signed or unsigned. The kflag intends to provide consistent output of BTF C fortmat with the original source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff. The format C has two choices, printing out 0xffffffff or -1 and current libbpf prints out as unsigned value. But if the signedness is preserved in btf, the value can be printed the same as the original source code. The kflag value 0 means unsigned values, which is consistent to the default by libbpf and should also cover most cases as well. The new BTF_KIND_ENUM64 is intended to support the enum value represented as 64bit value. But it can represent all BTF_KIND_ENUM values as well. The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has to be represented with 64 bits. In addition, a static inline function btf_kind_core_compat() is introduced which will be used later when libbpf relo_core.c changed. Here the kernel shares the same relo_core.c with libbpf. [1] https://reviews.llvm.org/D124641 Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/btf.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index a9162a6c0284..ec1798b6d3ff 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -36,10 +36,10 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union and fwd + * struct, union, enum, fwd and enum64 */ __u32 info; - /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC. + /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, @@ -63,7 +63,7 @@ enum { BTF_KIND_ARRAY = 3, /* Array */ BTF_KIND_STRUCT = 4, /* Struct */ BTF_KIND_UNION = 5, /* Union */ - BTF_KIND_ENUM = 6, /* Enumeration */ + BTF_KIND_ENUM = 6, /* Enumeration up to 32-bit values */ BTF_KIND_FWD = 7, /* Forward */ BTF_KIND_TYPEDEF = 8, /* Typedef */ BTF_KIND_VOLATILE = 9, /* Volatile */ @@ -76,6 +76,7 @@ enum { BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + BTF_KIND_ENUM64 = 19, /* Enumeration up to 64-bit values */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -186,4 +187,14 @@ struct btf_decl_tag { __s32 component_idx; }; +/* BTF_KIND_ENUM64 is followed by multiple "struct btf_enum64". + * The exact number of btf_enum64 is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_enum64 { + __u32 name_off; + __u32 val_lo32; + __u32 val_hi32; +}; + #endif /* _UAPI__LINUX_BTF_H__ */ -- cgit v1.2.3 From ed2351174e38ad4febbbc0dba802803e6cff8ae0 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 24 May 2022 21:56:21 +0800 Subject: KVM: x86: Extend KVM_{G,S}ET_VCPU_EVENTS to support pending triple fault For the triple fault sythesized by KVM, e.g. the RSM path or nested_vmx_abort(), if KVM exits to userspace before the request is serviced, userspace could migrate the VM and lose the triple fault. Extend KVM_{G,S}ET_VCPU_EVENTS to support pending triple fault with a new event KVM_VCPUEVENT_VALID_FAULT_FAULT so that userspace can save and restore the triple fault event. This extension is guarded by a new KVM capability KVM_CAP_TRIPLE_FAULT_EVENT. Note that in the set_vcpu_events path, userspace is able to set/clear the triple fault request through triple_fault.pending field. Signed-off-by: Chenyi Qiang Message-Id: <20220524135624.22988-2-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c4a32910b88a..ca799319acfd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1158,6 +1158,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 #define KVM_CAP_S390_PROTECTED_DUMP 217 +#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 2f4073e08f4cc5a41e35d777c240aaadd0257051 Mon Sep 17 00:00:00 2001 From: Tao Xu Date: Tue, 24 May 2022 21:56:24 +0800 Subject: KVM: VMX: Enable Notify VM exit There are cases that malicious virtual machines can cause CPU stuck (due to event windows don't open up), e.g., infinite loop in microcode when nested #AC (CVE-2015-5307). No event window means no event (NMI, SMI and IRQ) can be delivered. It leads the CPU to be unavailable to host or other VMs. VMM can enable notify VM exit that a VM exit generated if no event window occurs in VM non-root mode for a specified amount of time (notify window). Feature enabling: - The new vmcs field SECONDARY_EXEC_NOTIFY_VM_EXITING is introduced to enable this feature. VMM can set NOTIFY_WINDOW vmcs field to adjust the expected notify window. - Add a new KVM capability KVM_CAP_X86_NOTIFY_VMEXIT so that user space can query and enable this feature in per-VM scope. The argument is a 64bit value: bits 63:32 are used for notify window, and bits 31:0 are for flags. Current supported flags: - KVM_X86_NOTIFY_VMEXIT_ENABLED: enable the feature with the notify window provided. - KVM_X86_NOTIFY_VMEXIT_USER: exit to userspace once the exits happen. - It's safe to even set notify window to zero since an internal hardware threshold is added to vmcs.notify_window. VM exit handling: - Introduce a vcpu state notify_window_exits to records the count of notify VM exits and expose it through the debugfs. - Notify VM exit can happen incident to delivery of a vector event. Allow it in KVM. - Exit to userspace unconditionally for handling when VM_CONTEXT_INVALID bit is set. Nested handling - Nested notify VM exits are not supported yet. Keep the same notify window control in vmcs02 as vmcs01, so that L1 can't escape the restriction of notify VM exits through launching L2 VM. Notify VM exit is defined in latest Intel Architecture Instruction Set Extensions Programming Reference, chapter 9.2. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Tao Xu Co-developed-by: Chenyi Qiang Signed-off-by: Chenyi Qiang Message-Id: <20220524135624.22988-5-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ca799319acfd..7569b4ec199c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -270,6 +270,7 @@ struct kvm_xen_exit { #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 +#define KVM_EXIT_NOTIFY 36 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -496,6 +497,11 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_NOTIFY */ + struct { +#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; @@ -1159,6 +1165,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 #define KVM_CAP_S390_PROTECTED_DUMP 217 #define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 +#define KVM_CAP_X86_NOTIFY_VMEXIT 219 #ifdef KVM_CAP_IRQ_ROUTING @@ -2174,4 +2181,8 @@ struct kvm_stats_desc { /* Available with KVM_CAP_S390_PROTECTED_DUMP */ #define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_NOTIFY_VMEXIT */ +#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0) +#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 20e10881a043af63f2962a9e6bca64661225b383 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jun 2022 10:21:41 -0500 Subject: dma-buf: Add an API for exporting sync files (v14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modern userspace APIs like Vulkan are built on an explicit synchronization model. This doesn't always play nicely with the implicit synchronization used in the kernel and assumed by X11 and Wayland. The client -> compositor half of the synchronization isn't too bad, at least on intel, because we can control whether or not i915 synchronizes on the buffer and whether or not it's considered written. The harder part is the compositor -> client synchronization when we get the buffer back from the compositor. We're required to be able to provide the client with a VkSemaphore and VkFence representing the point in time where the window system (compositor and/or display) finished using the buffer. With current APIs, it's very hard to do this in such a way that we don't get confused by the Vulkan driver's access of the buffer. In particular, once we tell the kernel that we're rendering to the buffer again, any CPU waits on the buffer or GPU dependencies will wait on some of the client rendering and not just the compositor. This new IOCTL solves this problem by allowing us to get a snapshot of the implicit synchronization state of a given dma-buf in the form of a sync file. It's effectively the same as a poll() or I915_GEM_WAIT only, instead of CPU waiting directly, it encapsulates the wait operation, at the current moment in time, in a sync_file so we can check/wait on it later. As long as the Vulkan driver does the sync_file export from the dma-buf before we re-introduce it for rendering, it will only contain fences from the compositor or display. This allows to accurately turn it into a VkFence or VkSemaphore without any over-synchronization. By making this an ioctl on the dma-buf itself, it allows this new functionality to be used in an entirely driver-agnostic way without having access to a DRM fd. This makes it ideal for use in driver-generic code in Mesa or in a client such as a compositor where the DRM fd may be hard to reach. v2 (Jason Ekstrand): - Use a wrapper dma_fence_array of all fences including the new one when importing an exclusive fence. v3 (Jason Ekstrand): - Lock around setting shared fences as well as exclusive - Mark SIGNAL_SYNC_FILE as a read-write ioctl. - Initialize ret to 0 in dma_buf_wait_sync_file v4 (Jason Ekstrand): - Use the new dma_resv_get_singleton helper v5 (Jason Ekstrand): - Rename the IOCTLs to import/export rather than wait/signal - Drop the WRITE flag and always get/set the exclusive fence v6 (Jason Ekstrand): - Drop the sync_file import as it was all-around sketchy and not nearly as useful as import. - Re-introduce READ/WRITE flag support for export - Rework the commit message v7 (Jason Ekstrand): - Require at least one sync flag - Fix a refcounting bug: dma_resv_get_excl() doesn't take a reference - Use _rcu helpers since we're accessing the dma_resv read-only v8 (Jason Ekstrand): - Return -ENOMEM if the sync_file_create fails - Predicate support on IS_ENABLED(CONFIG_SYNC_FILE) v9 (Jason Ekstrand): - Add documentation for the new ioctl v10 (Jason Ekstrand): - Go back to dma_buf_sync_file as the ioctl struct name v11 (Daniel Vetter): - Go back to dma_buf_export_sync_file as the ioctl struct name - Better kerneldoc describing what the read/write flags do v12 (Christian König): - Document why we chose to make it an ioctl on dma-buf v13 (Jason Ekstrand): - Rebase on Christian König's fence rework v14 (Daniel Vetter & Christian König): - Use dma_rev_usage_rw to get the properly inverted usage to pass to dma_resv_get_singleton() - Clean up the sync_file and fd if copy_to_user() fails Signed-off-by: Jason Ekstrand Signed-off-by: Jason Ekstrand Signed-off-by: Jason Ekstrand Acked-by: Simon Ser Reviewed-by: Christian König Reviewed-by: Daniel Vetter Cc: Sumit Semwal Cc: Maarten Lankhorst Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20220608152142.14495-2-jason@jlekstrand.net --- include/uapi/linux/dma-buf.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 8e4a2ca0bcbf..46f1e3e98b02 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -85,6 +85,40 @@ struct dma_buf_sync { #define DMA_BUF_NAME_LEN 32 +/** + * struct dma_buf_export_sync_file - Get a sync_file from a dma-buf + * + * Userspace can perform a DMA_BUF_IOCTL_EXPORT_SYNC_FILE to retrieve the + * current set of fences on a dma-buf file descriptor as a sync_file. CPU + * waits via poll() or other driver-specific mechanisms typically wait on + * whatever fences are on the dma-buf at the time the wait begins. This + * is similar except that it takes a snapshot of the current fences on the + * dma-buf for waiting later instead of waiting immediately. This is + * useful for modern graphics APIs such as Vulkan which assume an explicit + * synchronization model but still need to inter-operate with dma-buf. + */ +struct dma_buf_export_sync_file { + /** + * @flags: Read/write flags + * + * Must be DMA_BUF_SYNC_READ, DMA_BUF_SYNC_WRITE, or both. + * + * If DMA_BUF_SYNC_READ is set and DMA_BUF_SYNC_WRITE is not set, + * the returned sync file waits on any writers of the dma-buf to + * complete. Waiting on the returned sync file is equivalent to + * poll() with POLLIN. + * + * If DMA_BUF_SYNC_WRITE is set, the returned sync file waits on + * any users of the dma-buf (read or write) to complete. Waiting + * on the returned sync file is equivalent to poll() with POLLOUT. + * If both DMA_BUF_SYNC_WRITE and DMA_BUF_SYNC_READ are set, this + * is equivalent to just DMA_BUF_SYNC_WRITE. + */ + __u32 flags; + /** @fd: Returned sync file descriptor */ + __s32 fd; +}; + #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) @@ -94,5 +128,6 @@ struct dma_buf_sync { #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) +#define DMA_BUF_IOCTL_EXPORT_SYNC_FILE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_export_sync_file) #endif -- cgit v1.2.3 From 594740497e998d30477ab26093bfb81c28cd3ff1 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 8 Jun 2022 10:21:42 -0500 Subject: dma-buf: Add an API for importing sync files (v10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is analogous to the previous sync file export patch in that it allows you to import a sync_file into a dma-buf. Unlike the previous patch, however, this does add genuinely new functionality to dma-buf. Without this, the only way to attach a sync_file to a dma-buf is to submit a batch to your driver of choice which waits on the sync_file and claims to write to the dma-buf. Even if said batch is a no-op, a submit is typically way more overhead than just attaching a fence. A submit may also imply extra synchronization with other work because it happens on a hardware queue. In the Vulkan world, this is useful for dealing with the out-fence from vkQueuePresent. Current Linux window-systems (X11, Wayland, etc.) all rely on dma-buf implicit sync. Since Vulkan is an explicit sync API, we get a set of fences (VkSemaphores) in vkQueuePresent and have to stash those as an exclusive (write) fence on the dma-buf. We handle it in Mesa today with the above mentioned dummy submit trick. This ioctl would allow us to set it directly without the dummy submit. This may also open up possibilities for GPU drivers to move away from implicit sync for their kernel driver uAPI and instead provide sync files and rely on dma-buf import/export for communicating with other implicit sync clients. We make the explicit choice here to only allow setting RW fences which translates to an exclusive fence on the dma_resv. There's no use for read-only fences for communicating with other implicit sync userspace and any such attempts are likely to be racy at best. When we got to insert the RW fence, the actual fence we set as the new exclusive fence is a combination of the sync_file provided by the user and all the other fences on the dma_resv. This ensures that the newly added exclusive fence will never signal before the old one would have and ensures that we don't break any dma_resv contracts. We require userspace to specify RW in the flags for symmetry with the export ioctl and in case we ever want to support read fences in the future. There is one downside here that's worth documenting: If two clients writing to the same dma-buf using this API race with each other, their actions on the dma-buf may happen in parallel or in an undefined order. Both with and without this API, the pattern is the same: Collect all the fences on dma-buf, submit work which depends on said fences, and then set a new exclusive (write) fence on the dma-buf which depends on said work. The difference is that, when it's all handled by the GPU driver's submit ioctl, the three operations happen atomically under the dma_resv lock. If two userspace submits race, one will happen before the other. You aren't guaranteed which but you are guaranteed that they're strictly ordered. If userspace manages the fences itself, then these three operations happen separately and the two render operations may happen genuinely in parallel or get interleaved. However, this is a case of userspace racing with itself. As long as we ensure userspace can't back the kernel into a corner, it should be fine. v2 (Jason Ekstrand): - Use a wrapper dma_fence_array of all fences including the new one when importing an exclusive fence. v3 (Jason Ekstrand): - Lock around setting shared fences as well as exclusive - Mark SIGNAL_SYNC_FILE as a read-write ioctl. - Initialize ret to 0 in dma_buf_wait_sync_file v4 (Jason Ekstrand): - Use the new dma_resv_get_singleton helper v5 (Jason Ekstrand): - Rename the IOCTLs to import/export rather than wait/signal - Drop the WRITE flag and always get/set the exclusive fence v6 (Jason Ekstrand): - Split import and export into separate patches - New commit message v7 (Daniel Vetter): - Fix the uapi header to use the right struct in the ioctl - Use a separate dma_buf_import_sync_file struct - Add kerneldoc for dma_buf_import_sync_file v8 (Jason Ekstrand): - Rebase on Christian König's fence rework v9 (Daniel Vetter): - Fix -EINVAL checks for the flags parameter - Add documentation about read/write fences - Add documentation about the expected usage of import/export and specifically call out the possible userspace race. v10 (Simon Ser): - Fix a typo in the docs Signed-off-by: Jason Ekstrand Signed-off-by: Jason Ekstrand Signed-off-by: Jason Ekstrand Reviewed-by: Christian König Reviewed-by: Daniel Vetter Cc: Sumit Semwal Cc: Maarten Lankhorst Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20220608152142.14495-3-jason@jlekstrand.net --- include/uapi/linux/dma-buf.h | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 46f1e3e98b02..30fb8834aa3c 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -96,6 +96,24 @@ struct dma_buf_sync { * dma-buf for waiting later instead of waiting immediately. This is * useful for modern graphics APIs such as Vulkan which assume an explicit * synchronization model but still need to inter-operate with dma-buf. + * + * The intended usage pattern is the following: + * + * 1. Export a sync_file with flags corresponding to the expected GPU usage + * via DMA_BUF_IOCTL_EXPORT_SYNC_FILE. + * + * 2. Submit rendering work which uses the dma-buf. The work should wait on + * the exported sync file before rendering and produce another sync_file + * when complete. + * + * 3. Import the rendering-complete sync_file into the dma-buf with flags + * corresponding to the GPU usage via DMA_BUF_IOCTL_IMPORT_SYNC_FILE. + * + * Unlike doing implicit synchronization via a GPU kernel driver's exec ioctl, + * the above is not a single atomic operation. If userspace wants to ensure + * ordering via these fences, it is the respnosibility of userspace to use + * locks or other mechanisms to ensure that no other context adds fences or + * submits work between steps 1 and 3 above. */ struct dma_buf_export_sync_file { /** @@ -119,6 +137,36 @@ struct dma_buf_export_sync_file { __s32 fd; }; +/** + * struct dma_buf_import_sync_file - Insert a sync_file into a dma-buf + * + * Userspace can perform a DMA_BUF_IOCTL_IMPORT_SYNC_FILE to insert a + * sync_file into a dma-buf for the purposes of implicit synchronization + * with other dma-buf consumers. This allows clients using explicitly + * synchronized APIs such as Vulkan to inter-op with dma-buf consumers + * which expect implicit synchronization such as OpenGL or most media + * drivers/video. + */ +struct dma_buf_import_sync_file { + /** + * @flags: Read/write flags + * + * Must be DMA_BUF_SYNC_READ, DMA_BUF_SYNC_WRITE, or both. + * + * If DMA_BUF_SYNC_READ is set and DMA_BUF_SYNC_WRITE is not set, + * this inserts the sync_file as a read-only fence. Any subsequent + * implicitly synchronized writes to this dma-buf will wait on this + * fence but reads will not. + * + * If DMA_BUF_SYNC_WRITE is set, this inserts the sync_file as a + * write fence. All subsequent implicitly synchronized access to + * this dma-buf will wait on this fence. + */ + __u32 flags; + /** @fd: Sync file descriptor */ + __s32 fd; +}; + #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) @@ -129,5 +177,6 @@ struct dma_buf_export_sync_file { #define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) #define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) #define DMA_BUF_IOCTL_EXPORT_SYNC_FILE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_export_sync_file) +#define DMA_BUF_IOCTL_IMPORT_SYNC_FILE _IOW(DMA_BUF_BASE, 3, struct dma_buf_import_sync_file) #endif -- cgit v1.2.3 From b489a6e5871690735752f8875f411e4d0cd8e5df Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 8 Jun 2022 18:34:25 +0300 Subject: tls: Rename TLS_INFO_ZC_SENDFILE to TLS_INFO_ZC_TX To embrace possible future optimizations of TLS, rename zerocopy sendfile definitions to more generic ones: * setsockopt: TLS_TX_ZEROCOPY_SENDFILE- > TLS_TX_ZEROCOPY_RO * sock_diag: TLS_INFO_ZC_SENDFILE -> TLS_INFO_ZC_RO_TX RO stands for readonly and emphasizes that the application shouldn't modify the data being transmitted with zerocopy to avoid potential disconnection. Fixes: c1318b39c7d3 ("tls: Add opt-in zerocopy mode of sendfile()") Signed-off-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20220608153425.3151146-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/tls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index ac39328eabe7..bb8f80812b0b 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -39,7 +39,7 @@ /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ -#define TLS_TX_ZEROCOPY_SENDFILE 3 /* transmit zerocopy sendfile */ +#define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -161,7 +161,7 @@ enum { TLS_INFO_CIPHER, TLS_INFO_TXCONF, TLS_INFO_RXCONF, - TLS_INFO_ZC_SENDFILE, + TLS_INFO_ZC_RO_TX, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) -- cgit v1.2.3 From 6b2a51ff03bf0c54cbc699ee85a9a49eb203ebfc Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Fri, 20 May 2022 18:15:01 +0000 Subject: fscrypt: Add HCTR2 support for filename encryption HCTR2 is a tweakable, length-preserving encryption mode that is intended for use on CPUs with dedicated crypto instructions. HCTR2 has the property that a bitflip in the plaintext changes the entire ciphertext. This property fixes a known weakness with filename encryption: when two filenames in the same directory share a prefix of >= 16 bytes, with AES-CTS-CBC their encrypted filenames share a common substring, leaking information. HCTR2 does not have this problem. More information on HCTR2 can be found here: "Length-preserving encryption with HCTR2": https://eprint.iacr.org/2021/1441.pdf Signed-off-by: Nathan Huckleberry Reviewed-by: Ard Biesheuvel Acked-by: Eric Biggers Signed-off-by: Herbert Xu --- include/uapi/linux/fscrypt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 9f4428be3e36..a756b29afcc2 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -27,7 +27,8 @@ #define FSCRYPT_MODE_AES_128_CBC 5 #define FSCRYPT_MODE_AES_128_CTS 6 #define FSCRYPT_MODE_ADIANTUM 9 -/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */ +#define FSCRYPT_MODE_AES_256_HCTR2 10 +/* If adding a mode number > 10, update FSCRYPT_MODE_MAX in fscrypt_private.h */ /* * Legacy policy version; ad-hoc KDF and no key verification. -- cgit v1.2.3 From 924b290655c0f17ac84e752addfc9bc3ec361069 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 24 May 2022 13:47:40 -0700 Subject: xfrm: convert alg_key to flexible array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Iproute2 build generates a warning when built with gcc-12. This is because the alg_key in xfrm.h API has zero size array element instead of flexible array. CC xfrm_state.o In function ‘xfrm_algo_parse’, inlined from ‘xfrm_state_modify.constprop’ at xfrm_state.c:573:5: xfrm_state.c:162:32: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=] 162 | buf[j] = val; | ~~~~~~~^~~~~ This patch convert the alg_key into flexible array member. There are other zero size arrays here that should be converted as well. This patch is RFC only since it is only compile tested and passes trivial iproute2 tests. Signed-off-by: Stephen Hemminger Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 65e13a099b1a..3ed61df9cc91 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -102,21 +102,21 @@ struct xfrm_replay_state_esn { struct xfrm_algo { char alg_name[64]; unsigned int alg_key_len; /* in bits */ - char alg_key[0]; + char alg_key[]; }; struct xfrm_algo_auth { char alg_name[64]; unsigned int alg_key_len; /* in bits */ unsigned int alg_trunc_len; /* in bits */ - char alg_key[0]; + char alg_key[]; }; struct xfrm_algo_aead { char alg_name[64]; unsigned int alg_key_len; /* in bits */ unsigned int alg_icv_len; /* in bits */ - char alg_key[0]; + char alg_key[]; }; struct xfrm_stats { -- cgit v1.2.3 From 20646f5b1e798bcc20044ae90ac3702f177bf254 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 6 Jun 2022 17:23:45 +0200 Subject: netfilter: xtables: Bring SPDX identifier back Commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") added the correct SPDX identifier to include/uapi/linux/netfilter/xt_IDLETIMER.h. A subsequent commit removed it for no reason and reintroduced the UAPI license incorrectness as the file is now missing the UAPI exception again. Add it back and remove the GPLv2 boilerplate while at it. Fixes: 68983a354a65 ("netfilter: xtables: Add snapshot of hardidletimer target") Cc: Manoj Basapathi Cc: Subash Abhinov Kasiviswanathan Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/netfilter/xt_IDLETIMER.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 49ddcdc61c09..7bfb31a66fc9 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * linux/include/linux/netfilter/xt_IDLETIMER.h - * * Header file for Xtables timer target module. * * Copyright (C) 2004, 2010 Nokia Corporation @@ -10,20 +9,6 @@ * by Luciano Coelho * * Contact: Luciano Coelho - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA */ #ifndef _XT_IDLETIMER_H -- cgit v1.2.3 From 17472bc2c3d00e4ed72c3c7b9af0697edea02fff Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:27 +0200 Subject: wifi: nl80211: fix typo in comment Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20220521111145.81697-77-Julia.Lawall@inria.fr Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d9490e3062a7..98f905f16411 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5874,7 +5874,7 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up * the connected inactive stations in AP mode. * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested - * to work properly to suppport receiving regulatory hints from + * to work properly to support receiving regulatory hints from * cellular base stations. * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only * here to reserve the value for API/ABI compatibility) -- cgit v1.2.3 From d884b6498d2f022098502e106d5a45ab635f2e9a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 14 Jun 2022 17:51:18 +0100 Subject: io_uring: remove IORING_CLOSE_FD_AND_FILE_SLOT This partially reverts a7c41b4687f5902af70cd559806990930c8a307b Even though IORING_CLOSE_FD_AND_FILE_SLOT might save cycles for some users, but it tries to do two things at a time and it's not clear how to handle errors and what to return in a single result field when one part fails and another completes well. Kill it for now. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/837c745019b3795941eee4fcfd7de697886d645b.1655224415.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 776e0278f9dd..53e7dae92e42 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -47,7 +47,6 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; - __u32 close_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -259,11 +258,6 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) -/* - * close flags, store in sqe->close_flags - */ -#define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0) - /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 9731dd4cadc53251ef80b3655c8d841fed52fa3d Mon Sep 17 00:00:00 2001 From: Daniel Phillips Date: Mon, 30 May 2022 11:21:22 -0400 Subject: drm/amdkfd: Add available memory ioctl Add a new KFD ioctl to return the largest possible memory size that can be allocated as a buffer object using kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same accept/reject criteria as that function so that allocating a new buffer object of the size returned by this new ioctl is guaranteed to succeed, barring races with other allocating tasks. This IOCTL will be used by libhsakmt: https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html Signed-off-by: Daniel Phillips Signed-off-by: David Yat Sin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 42975e940758..231eb010b823 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -34,9 +34,10 @@ * - 1.6 - Query clear flags in SVM get_attr API * - 1.7 - Checkpoint Restore (CRIU) API * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs + * - 1.9 - Add available memory ioctl */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 8 +#define KFD_IOCTL_MINOR_VERSION 9 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -100,6 +101,12 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; +struct kfd_ioctl_get_available_memory_args { + __u64 available; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -826,7 +833,10 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_CRIU_OP \ AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) +#define AMDKFD_IOC_AVAILABLE_MEMORY \ + AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x23 +#define AMDKFD_COMMAND_END 0x24 #endif -- cgit v1.2.3 From d30dfd490f7dc4cb6a7c11a647bd1ff7a22139e7 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Wed, 8 Jun 2022 15:35:39 -0700 Subject: include/uapi/linux/swab.h: move explicit cast outside ternary A cast inside __builtin_constant_p doesn't do anything since it should evaluate as constant at compile time irrespective of this cast. Instead, I moved this cast outside the ternary to ensure the return type is as expected. Additionally, if __HAVE_BUILTIN_BSWAP16__ was not defined then __swab16 is actually returning an `int` not a `u16` due to integer promotion. As Al Viro notes: You *can't* get smaller-than-int out of ? :, same as you can't get it out of addition, etc. This also fixes some clang -Wformat warnings involving default argument promotion. Link: https://github.com/ClangBuiltLinux/linux/issues/378 Link: https://lkml.kernel.org/r/20220608223539.470472-1-justinstitt@google.com Signed-off-by: Justin Stitt Suggested-by: Al Viro Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Suggested-by: Nick Desaulniers Signed-off-by: Andrew Morton --- include/uapi/linux/swab.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 7272f85d6d6a..0723a9cce747 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -102,7 +102,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) #else #define __swab16(x) \ - (__builtin_constant_p((__u16)(x)) ? \ + (__u16)(__builtin_constant_p(x) ? \ ___constant_swab16(x) : \ __fswab16(x)) #endif @@ -115,7 +115,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x)) #else #define __swab32(x) \ - (__builtin_constant_p((__u32)(x)) ? \ + (__u32)(__builtin_constant_p(x) ? \ ___constant_swab32(x) : \ __fswab32(x)) #endif @@ -128,7 +128,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) #define __swab64(x) (__u64)__builtin_bswap64((__u64)(x)) #else #define __swab64(x) \ - (__builtin_constant_p((__u64)(x)) ? \ + (__u64)(__builtin_constant_p(x) ? \ ___constant_swab64(x) : \ __fswab64(x)) #endif -- cgit v1.2.3 From ac80287a6af9fc3f3d189d6d1f523889a0a9e1bc Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:42 +0300 Subject: bpf: Fix documentation of th_len in bpf_tcp_{gen,check}_syncookie bpf_tcp_gen_syncookie expects the full length of the TCP header (with all options), and bpf_tcp_check_syncookie accepts lengths bigger than sizeof(struct tcphdr). Fix the documentation that says these lengths should be exactly sizeof(struct tcphdr). While at it, fix a typo in the name of struct ipv6hdr. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-2-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62d..f545e39df72a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3597,10 +3597,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. @@ -3783,10 +3784,11 @@ union bpf_attr { * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. + * contains the length of the TCP header with options (at least + * **sizeof**\ (**struct tcphdr**)). * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, -- cgit v1.2.3 From 33bf9885040c399cf6a95bd33216644126728e14 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:44 +0300 Subject: bpf: Add helpers to issue and check SYN cookies in XDP The new helpers bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6} allow an XDP program to generate SYN cookies in response to TCP SYN packets and to check those cookies upon receiving the first ACK packet (the final packet of the TCP handshake). Unlike bpf_tcp_{gen,check}_syncookie these new helpers don't need a listening socket on the local machine, which allows to use them together with synproxy to accelerate SYN cookie generation. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-4-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f545e39df72a..e81362891596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5251,6 +5251,80 @@ union bpf_attr { * Pointer to the underlying dynptr data, NULL if the dynptr is * read-only, if the dynptr is invalid, or if the offset and length * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5457,6 +5531,10 @@ union bpf_attr { FN(dynptr_read), \ FN(dynptr_write), \ FN(dynptr_data), \ + FN(tcp_raw_gen_syncookie_ipv4), \ + FN(tcp_raw_gen_syncookie_ipv6), \ + FN(tcp_raw_check_syncookie_ipv4), \ + FN(tcp_raw_check_syncookie_ipv6), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 5374d8fb75f313294c7d97e85c22bead34d63f2b Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 19 May 2022 08:11:46 +0100 Subject: media: Add P010 video format P010 is a YUV format with 10-bits per component with interleaved UV. Signed-off-by: Benjamin Gaignard Acked-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 343b95107fce..5311ac4fde35 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -602,6 +602,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV61 v4l2_fourcc('N', 'V', '6', '1') /* 16 Y/CrCb 4:2:2 */ #define V4L2_PIX_FMT_NV24 v4l2_fourcc('N', 'V', '2', '4') /* 24 Y/CbCr 4:4:4 */ #define V4L2_PIX_FMT_NV42 v4l2_fourcc('N', 'V', '4', '2') /* 24 Y/CrCb 4:4:4 */ +#define V4L2_PIX_FMT_P010 v4l2_fourcc('P', '0', '1', '0') /* 24 Y/CbCr 4:2:0 10-bit per component */ /* two non contiguous planes - one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12M v4l2_fourcc('N', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 */ -- cgit v1.2.3 From 7b0a0e3c3a88260b6fcb017e49f198463aa62ed1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 14 Apr 2022 16:50:57 +0200 Subject: wifi: cfg80211: do some rework towards MLO link APIs In order to support multi-link operation with multiple links, start adding some APIs. The notable addition here is to have the link ID in a new nl80211 attribute, that will be used to differentiate the links in many nl80211 operations. So far, this patch adds the netlink NL80211_ATTR_MLO_LINK_ID attribute (as well as the NL80211_ATTR_MLO_LINKS attribute) and plugs it through the system in some places, checking the validity etc. along with other infrastructure needed for it. For now, I've decided to include only the over-the-air link ID in the API. I know we discussed that we eventually need to have to have other ways of identifying a link, but for local AP mode and auth/assoc commands as well as set_key etc. we'll use the OTA ID. Also included in this patch is some refactoring of the data structures in struct wireless_dev, splitting for the first time the data into type dependent pieces, to make reasoning about these things easier. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98f905f16411..a9a2c9fef295 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,6 +323,17 @@ * Once the association is done, the driver cleans the FILS AAD data. */ +/** + * DOC: Multi-Link Operation + * + * In Multi-Link Operation, a connection between to MLDs utilizes multiple + * links. To use this in nl80211, various commands and responses now need + * to or will include the new %NL80211_ATTR_MLO_LINKS attribute. + * Additionally, various commands that need to operate on a specific link + * now need to be given the %NL80211_ATTR_MLO_LINK_ID attribute, e.g. to + * use %NL80211_CMD_START_AP or similar functions. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1237,6 +1248,12 @@ * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to * specify the timeout value. * + * @NL80211_CMD_ADD_LINK: Add a new link to an interface. The + * %NL80211_ATTR_MLO_LINK_ID attribute is used for the new link. + * @NL80211_CMD_REMOVE_LINK: Remove a link from an interface. This may come + * without %NL80211_ATTR_MLO_LINK_ID as an easy way to remove all links + * in preparation for e.g. roaming to a regular (non-MLO) AP. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1481,6 +1498,9 @@ enum nl80211_commands { NL80211_CMD_ASSOC_COMEBACK, + NL80211_CMD_ADD_LINK, + NL80211_CMD_REMOVE_LINK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2663,6 +2683,11 @@ enum nl80211_commands { * association request when used with NL80211_CMD_NEW_STATION). Can be set * only if %NL80211_STA_FLAG_WME is set. * + * @NL80211_ATTR_MLO_LINK_ID: A (u8) link ID for use with MLO, to be used with + * various commands that need a link ID to operate. + * @NL80211_ATTR_MLO_LINKS: A nested array of links, each containing some + * per-link information and a link ID. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3177,6 +3202,9 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_EHT, + NL80211_ATTR_MLO_LINKS, + NL80211_ATTR_MLO_LINK_ID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From d648c23024bd01333acd2fd5e34bcde0ffb66b16 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 31 May 2022 19:48:33 +0200 Subject: wifi: nl80211: support MLO in auth/assoc For authentication, we need the BSS, the link_id and the AP MLD address to create the link and station, (for now) the driver assigns a link address and sends the frame, the MLD address needs to be the address of the interface. For association, pass the list of BSSes that were selected for the MLO connection, along with extra per-STA profile elements, the AP MLD address and the link ID on which the association request should be sent. Note that for now we don't have a proper way to pass the link address(es) and so the driver/mac80211 will select one, but depending on how that selection works it means that assoc w/o auth data still being around (mac80211 implementation detail) the association won't necessarily work - so this will need to be extended in the future to sort out the link addressing. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a9a2c9fef295..60ad9a9f153d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2687,6 +2687,8 @@ enum nl80211_commands { * various commands that need a link ID to operate. * @NL80211_ATTR_MLO_LINKS: A nested array of links, each containing some * per-link information and a link ID. + * @NL80211_ATTR_MLD_ADDR: An MLD address, used with various commands such as + * authenticate/associate. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -3204,6 +3206,7 @@ enum nl80211_attrs { NL80211_ATTR_MLO_LINKS, NL80211_ATTR_MLO_LINK_ID, + NL80211_ATTR_MLD_ADDR, /* add attributes here, update the policy in nl80211.c */ -- cgit v1.2.3 From efbabc11650040c64884ff3019b88c7bcc0ceb1d Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Wed, 8 Jun 2022 14:46:37 +0530 Subject: cfg80211: Indicate MLO connection info in connect and roam callbacks The MLO links used for connection with an MLD AP are decided by the driver in case of SME offloaded to driver. Add support for the drivers to indicate the information of links used for MLO connection in connect and roam callbacks, update the connected links information in wdev from connect/roam result sent by driver. Also, send the connected links information to userspace. Add a netlink flag attribute to indicate that userspace supports handling of MLO connection. Drivers must not do MLO connection when this flag is not set. This is to maintain backwards compatibility with older supplicant versions which doesn't have support for MLO connection. Signed-off-by: Veerendranath Jakkam Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 60ad9a9f153d..89f64f46b98d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2690,6 +2690,10 @@ enum nl80211_commands { * @NL80211_ATTR_MLD_ADDR: An MLD address, used with various commands such as * authenticate/associate. * + * @NL80211_ATTR_MLO_SUPPORT: Flag attribute to indicate user space supports MLO + * connection. Used with %NL80211_CMD_CONNECT. If this attribute is not + * included in NL80211_CMD_CONNECT drivers must not perform MLO connection. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3208,6 +3212,8 @@ enum nl80211_attrs { NL80211_ATTR_MLO_LINK_ID, NL80211_ATTR_MLD_ADDR, + NL80211_ATTR_MLO_SUPPORT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From e6445be4f549a0342cd9b8672b82e3b1d85d017f Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Sat, 11 Jun 2022 22:14:08 +0800 Subject: media: uapi: Add some RGB bus formats for i.MX8qm/qxp pixel combiner This patch adds RGB666_1X30_CPADLO, RGB888_1X30_CPADLO, RGB666_1X36_CPADLO and RGB888_1X36_CPADLO bus formats used by i.MX8qm/qxp pixel combiner. The RGB pixels with padding low per component are transmitted on a 30-bit input bus(10-bit per component) from a display controller or a 36-bit output bus(12-bit per component) to a pixel link. Reviewed-by: Robert Foss Reviewed-by: Laurent Pinchart Signed-off-by: Liu Ying Acked-by: Sakari Ailus Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20220611141421.718743-2-victor.liu@nxp.com --- include/uapi/linux/media-bus-format.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h index 0dfc11ee243a..ec3323dbb927 100644 --- a/include/uapi/linux/media-bus-format.h +++ b/include/uapi/linux/media-bus-format.h @@ -34,7 +34,7 @@ #define MEDIA_BUS_FMT_FIXED 0x0001 -/* RGB - next is 0x101e */ +/* RGB - next is 0x1022 */ #define MEDIA_BUS_FMT_RGB444_1X12 0x1016 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE 0x1001 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE 0x1002 @@ -59,9 +59,13 @@ #define MEDIA_BUS_FMT_RGB888_3X8_DELTA 0x101d #define MEDIA_BUS_FMT_RGB888_1X7X4_SPWG 0x1011 #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA 0x1012 +#define MEDIA_BUS_FMT_RGB666_1X30_CPADLO 0x101e +#define MEDIA_BUS_FMT_RGB888_1X30_CPADLO 0x101f #define MEDIA_BUS_FMT_ARGB8888_1X32 0x100d #define MEDIA_BUS_FMT_RGB888_1X32_PADHI 0x100f #define MEDIA_BUS_FMT_RGB101010_1X30 0x1018 +#define MEDIA_BUS_FMT_RGB666_1X36_CPADLO 0x1020 +#define MEDIA_BUS_FMT_RGB888_1X36_CPADLO 0x1021 #define MEDIA_BUS_FMT_RGB121212_1X36 0x1019 #define MEDIA_BUS_FMT_RGB161616_1X48 0x101a -- cgit v1.2.3 From 084cc29f8bbb034cf30a7ee07a816c115e0c28df Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Mon, 13 Jun 2022 21:25:21 +0000 Subject: KVM: x86/MMU: Allow NX huge pages to be disabled on a per-vm basis In some cases, the NX hugepage mitigation for iTLB multihit is not needed for all guests on a host. Allow disabling the mitigation on a per-VM basis to avoid the performance hit of NX hugepages on trusted workloads. In order to disable NX hugepages on a VM, ensure that the userspace actor has permission to reboot the system. Since disabling NX hugepages would allow a guest to crash the system, it is similar to reboot permissions. Ideally, KVM would require userspace to prove it has access to KVM's nx_huge_pages module param, e.g. so that userspace can opt out without needing full reboot permissions. But getting access to the module param file info is difficult because it is buried in layers of sysfs and module glue. Requiring CAP_SYS_BOOT is sufficient for all known use cases. Suggested-by: Jim Mattson Reviewed-by: David Matlack Reviewed-by: Peter Xu Signed-off-by: Ben Gardon Message-Id: <20220613212523.3436117-9-bgardon@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7569b4ec199c..a36e78710382 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1166,6 +1166,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED_DUMP 217 #define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218 #define KVM_CAP_X86_NOTIFY_VMEXIT 219 +#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 0a2ff7cc8ad48a86939a91bd3457f38e59e741a1 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 21 Jun 2022 15:49:19 +0800 Subject: Bonding: add per-port priority for failover re-selection Add per port priority support for bonding active slave re-selection during failover. A higher number means higher priority in selection. The primary slave still has the highest priority. This option also follows the primary_reselect rules. This option could only be configured via netlink. Signed-off-by: Hangbin Liu Acked-by: Jonathan Toppins Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5f58dcfe2787..e36d9d2c65a7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -963,6 +963,7 @@ enum { IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, + IFLA_BOND_SLAVE_PRIO, __IFLA_BOND_SLAVE_MAX, }; -- cgit v1.2.3 From a08d6a6dc82036cbd889fe3d53f9c69dc13436eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 24 Jun 2022 16:39:48 +0200 Subject: net: dsa: add Renesas RZ/N1 switch tag driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The switch that is present on the Renesas RZ/N1 SoC uses a specific VLAN value followed by 6 bytes which contains forwarding configuration. Signed-off-by: Clément Léger Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 1d0bccc3fa54..d370165bc621 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -116,6 +116,7 @@ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DSA_8021Q 0xDADB /* Fake VLAN Header for DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_DSA_A5PSW 0xE001 /* A5PSW Tag Value [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ -- cgit v1.2.3 From e23ee9d2c4ccb08fdfee3aea0a04a27bccd77433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:54:21 +0300 Subject: serial: Use bits for UART_LSR_BRK_ERROR_BITS/MSR_ANY_DELTA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of listing the bits for UART_LSR_BRK_ERROR_BITS and UART_MSR_ANY_DELTA in comment, use them to define instead. Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624205424.12686-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index f51bc8f36813..bab3b39266cc 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -139,7 +139,7 @@ #define UART_LSR_PE 0x04 /* Parity error indicator */ #define UART_LSR_OE 0x02 /* Overrun error indicator */ #define UART_LSR_DR 0x01 /* Receiver data ready */ -#define UART_LSR_BRK_ERROR_BITS 0x1E /* BI, FE, PE, OE bits */ +#define UART_LSR_BRK_ERROR_BITS (UART_LSR_BI|UART_LSR_FE|UART_LSR_PE|UART_LSR_OE) #define UART_MSR 6 /* In: Modem Status Register */ #define UART_MSR_DCD 0x80 /* Data Carrier Detect */ @@ -150,7 +150,7 @@ #define UART_MSR_TERI 0x04 /* Trailing edge ring indicator */ #define UART_MSR_DDSR 0x02 /* Delta DSR */ #define UART_MSR_DCTS 0x01 /* Delta CTS */ -#define UART_MSR_ANY_DELTA 0x0F /* Any of the delta bits! */ +#define UART_MSR_ANY_DELTA (UART_MSR_DDCD|UART_MSR_TERI|UART_MSR_DDSR|UART_MSR_DCTS) #define UART_SCR 7 /* I/O: Scratch Register */ -- cgit v1.2.3 From 4f768e94774c58c9f7f54ebd38dadf172970046a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 24 Jun 2022 23:42:09 +0300 Subject: serial: Support for RS-485 multipoint addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for RS-485 multipoint addressing using 9th bit [*]. The addressing mode is configured through ->rs485_config(). ADDRB in termios indicates 9th bit addressing mode is enabled. In this mode, 9th bit is used to indicate an address (byte) within the communication line. ADDRB can only be enabled/disabled through ->rs485_config() that is also responsible for setting the destination and receiver (filter) addresses. Add traps to detect unwanted changes to struct serial_rs485 layout using static_assert(). [*] Technically, RS485 is just an electronic spec and does not itself specify the 9th bit addressing mode but 9th bit seems at least "semi-standard" way to do addressing with RS485. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220624204210.11112-6-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index fa6b16e5fdd8..cea06924b295 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -126,10 +126,26 @@ struct serial_rs485 { #define SER_RS485_TERMINATE_BUS (1 << 5) /* Enable bus termination (if supported) */ + +/* RS-485 addressing mode */ +#define SER_RS485_ADDRB (1 << 6) /* Enable addressing mode */ +#define SER_RS485_ADDR_RECV (1 << 7) /* Receive address filter */ +#define SER_RS485_ADDR_DEST (1 << 8) /* Destination address */ + __u32 delay_rts_before_send; /* Delay before send (milliseconds) */ __u32 delay_rts_after_send; /* Delay after send (milliseconds) */ - __u32 padding[5]; /* Memory is cheap, new structs - are a royal PITA .. */ + + /* The fields below are defined by flags */ + union { + __u32 padding[5]; /* Memory is cheap, new structs are a pain */ + + struct { + __u8 addr_recv; + __u8 addr_dest; + __u8 padding0[2]; + __u32 padding1[4]; + }; + }; }; /* -- cgit v1.2.3 From ec5ad331680c96ef3dd30dc297b206988023b9e1 Mon Sep 17 00:00:00 2001 From: Max Staudt Date: Sat, 18 Jun 2022 20:01:34 +0200 Subject: tty: Add N_CAN327 line discipline ID for ELM327 based CAN driver The actual driver will be added via the CAN tree. Acked-by: Marc Kleine-Budde Signed-off-by: Max Staudt Link: https://lore.kernel.org/r/20220618180134.9890-1-max@enpas.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/tty.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index 9d0f06bfbac3..68aeae2addec 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -38,8 +38,9 @@ #define N_NULL 27 /* Null ldisc used for error handling */ #define N_MCTP 28 /* MCTP-over-serial */ #define N_DEVELOPMENT 29 /* Manual out-of-tree testing */ +#define N_CAN327 30 /* ELM327 based OBD-II interfaces */ /* Always the newest line discipline + 1 */ -#define NR_LDISCS 30 +#define NR_LDISCS 31 #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From 9864bb4801331daa48514face9d0f4861e4d485b Mon Sep 17 00:00:00 2001 From: Li Li Date: Thu, 26 May 2022 15:00:18 -0700 Subject: Binder: add TF_UPDATE_TXN to replace outdated txn When the target process is busy, incoming oneway transactions are queued in the async_todo list. If the clients continue sending extra oneway transactions while the target process is frozen, this queue can become too large to accommodate new transactions. That's why binder driver introduced ONEWAY_SPAM_DETECTION to detect this situation. It's helpful to debug the async binder buffer exhausting issue, but the issue itself isn't solved directly. In real cases applications are designed to send oneway transactions repeatedly, delivering updated inforamtion to the target process. Typical examples are Wi-Fi signal strength and some real time sensor data. Even if the apps might only care about the lastet information, all outdated oneway transactions are still accumulated there until the frozen process is thawed later. For this kind of situations, there's no existing method to skip those outdated transactions and deliver the latest one only. This patch introduces a new transaction flag TF_UPDATE_TXN. To use it, use apps can set this new flag along with TF_ONE_WAY. When such an oneway transaction is to be queued into the async_todo list of a frozen process, binder driver will check if any previous pending transactions can be superseded by comparing their code, flags and target node. If such an outdated pending transaction is found, the latest transaction will supersede that outdated one. This effectively prevents the async binder buffer running out and saves unnecessary binder read workloads. Acked-by: Todd Kjos Signed-off-by: Li Li Link: https://lore.kernel.org/r/20220526220018.3334775-2-dualli@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 986333cf5bbe..e72e4de8f452 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -287,6 +287,7 @@ enum transaction_flags { TF_STATUS_CODE = 0x08, /* contents are a 32-bit status code */ TF_ACCEPT_FDS = 0x10, /* allow replies with file descriptors */ TF_CLEAR_BUF = 0x20, /* clear buffer on txn complete */ + TF_UPDATE_TXN = 0x40, /* update the outdated pending async txn */ }; struct binder_transaction_data { -- cgit v1.2.3 From 713eb3c1261a1f89e35bdf233265aa5a2c46e9b2 Mon Sep 17 00:00:00 2001 From: Max Staudt Date: Sat, 18 Jun 2022 20:01:34 +0200 Subject: tty: Add N_CAN327 line discipline ID for ELM327 based CAN driver The actual driver will be added via the CAN tree. Link: https://lore.kernel.org/all/20220618180134.9890-1-max@enpas.org Link: https://lore.kernel.org/all/Yrm9Ezlw1dLmIxyS@kroah.com Signed-off-by: Max Staudt Acked-by: Marc Kleine-Budde Acked-by: Greg Kroah-Hartman Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/tty.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index 9d0f06bfbac3..68aeae2addec 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -38,8 +38,9 @@ #define N_NULL 27 /* Null ldisc used for error handling */ #define N_MCTP 28 /* MCTP-over-serial */ #define N_DEVELOPMENT 29 /* Manual out-of-tree testing */ +#define N_CAN327 30 /* ELM327 based OBD-II interfaces */ /* Always the newest line discipline + 1 */ -#define NR_LDISCS 30 +#define NR_LDISCS 31 #endif /* _UAPI_LINUX_TTY_H */ -- cgit v1.2.3 From 119a784c81270eb88e573174ed2209225d646656 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Jun 2022 11:06:23 -0700 Subject: perf/core: Add a new read format to get a number of lost samples Sometimes we want to know an accurate number of samples even if it's lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which might be shared with other events. So it's hard to know per-event lost count. Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from userspace. Original-patch-by: Jiri Olsa Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220616180623.1358843-1-namhyung@kernel.org --- include/uapi/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d37629dbad72..0474ee362151 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -301,6 +301,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -308,6 +309,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -317,8 +319,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -- cgit v1.2.3 From 94dfc73e7cf4a31da66b8843f0b9283ddd6b8381 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 6 Apr 2022 19:36:51 -0500 Subject: treewide: uapi: Replace zero-length arrays with flexible-array members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (linux-5.19-rc2$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script.cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; -fstrict-flex-arrays=3 is coming and we need to land these changes to prevent issues like these in the short future: ../fs/minix/dir.c:337:3: warning: 'strcpy' will always overflow; destination buffer has size 0, but the source string has length 2 (including NUL byte) [-Wfortify-source] strcpy(de3->name, "."); ^ Since these are all [0] to [] changes, the risk to UAPI is nearly zero. If this breaks anything, we can use a union with a new member name. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Build-tested-by: kernel test robot Link: https://lore.kernel.org/lkml/62b675ec.wKX6AOZ6cbE71vtF%25lkp@intel.com/ Acked-by: Dan Williams # For ndctl.h Signed-off-by: Gustavo A. R. Silva --- include/uapi/linux/blkzoned.h | 2 +- include/uapi/linux/bpf.h | 2 +- include/uapi/linux/btrfs.h | 10 ++++----- include/uapi/linux/btrfs_tree.h | 2 +- include/uapi/linux/can/bcm.h | 2 +- include/uapi/linux/connector.h | 2 +- include/uapi/linux/cycx_cfm.h | 2 +- include/uapi/linux/dm-ioctl.h | 8 +++---- include/uapi/linux/dm-log-userspace.h | 2 +- include/uapi/linux/ethtool.h | 28 ++++++++++++------------- include/uapi/linux/fanotify.h | 2 +- include/uapi/linux/fiemap.h | 2 +- include/uapi/linux/firewire-cdev.h | 12 +++++------ include/uapi/linux/fs.h | 2 +- include/uapi/linux/if_alg.h | 2 +- include/uapi/linux/if_arcnet.h | 6 +++--- include/uapi/linux/if_pppox.h | 4 ++-- include/uapi/linux/if_tun.h | 2 +- include/uapi/linux/igmp.h | 6 +++--- include/uapi/linux/inet_diag.h | 2 +- include/uapi/linux/inotify.h | 2 +- include/uapi/linux/ip.h | 4 ++-- include/uapi/linux/ip_vs.h | 4 ++-- include/uapi/linux/iso_fs.h | 4 ++-- include/uapi/linux/jffs2.h | 8 +++---- include/uapi/linux/kcov.h | 2 +- include/uapi/linux/kvm.h | 8 +++---- include/uapi/linux/minix_fs.h | 4 ++-- include/uapi/linux/mmc/ioctl.h | 2 +- include/uapi/linux/ndctl.h | 10 ++++----- include/uapi/linux/net_dropmon.h | 4 ++-- include/uapi/linux/netfilter/x_tables.h | 4 ++-- include/uapi/linux/netfilter_arp/arp_tables.h | 6 +++--- include/uapi/linux/netfilter_bridge/ebt_among.h | 2 +- include/uapi/linux/netfilter_ipv4/ip_tables.h | 6 +++--- include/uapi/linux/netfilter_ipv6/ip6_tables.h | 4 ++-- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/pkt_cls.h | 4 ++-- include/uapi/linux/raid/md_p.h | 2 +- include/uapi/linux/random.h | 2 +- include/uapi/linux/romfs_fs.h | 4 ++-- include/uapi/linux/rtnetlink.h | 2 +- include/uapi/linux/sctp.h | 10 ++++----- include/uapi/linux/seg6.h | 2 +- include/uapi/linux/seg6_iptunnel.h | 2 +- include/uapi/linux/stm.h | 2 +- include/uapi/linux/target_core_user.h | 2 +- include/uapi/linux/usb/audio.h | 2 +- include/uapi/linux/usb/cdc.h | 6 +++--- include/uapi/linux/usb/ch9.h | 2 +- include/uapi/linux/usb/raw_gadget.h | 4 ++-- include/uapi/linux/usbdevice_fs.h | 4 ++-- include/uapi/linux/vhost_types.h | 4 ++-- include/uapi/linux/virtio_9p.h | 2 +- include/uapi/linux/xfrm.h | 10 ++++----- 55 files changed, 121 insertions(+), 121 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 656a326821a2..b80fcc9ea525 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -130,7 +130,7 @@ struct blk_zone_report { __u64 sector; __u32 nr_zones; __u32 flags; - struct blk_zone zones[0]; + struct blk_zone zones[]; }; /** diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62d..e4b33ba06f00 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -79,7 +79,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d956b2993970..3d0edbe3b991 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -93,7 +93,7 @@ struct btrfs_qgroup_inherit { __u64 num_ref_copies; __u64 num_excl_copies; struct btrfs_qgroup_limit lim; - __u64 qgroups[0]; + __u64 qgroups[]; }; struct btrfs_ioctl_qgroup_limit_args { @@ -561,7 +561,7 @@ struct btrfs_ioctl_search_args_v2 { __u64 buf_size; /* in - size of buffer * out - on EOVERFLOW: needed size * to store item */ - __u64 buf[0]; /* out - found items */ + __u64 buf[]; /* out - found items */ }; struct btrfs_ioctl_clone_range_args { @@ -632,7 +632,7 @@ struct btrfs_ioctl_same_args { __u16 dest_count; /* in - total elements in info array */ __u16 reserved1; __u32 reserved2; - struct btrfs_ioctl_same_extent_info info[0]; + struct btrfs_ioctl_same_extent_info info[]; }; struct btrfs_ioctl_space_info { @@ -644,7 +644,7 @@ struct btrfs_ioctl_space_info { struct btrfs_ioctl_space_args { __u64 space_slots; __u64 total_spaces; - struct btrfs_ioctl_space_info spaces[0]; + struct btrfs_ioctl_space_info spaces[]; }; struct btrfs_data_container { @@ -652,7 +652,7 @@ struct btrfs_data_container { __u32 bytes_missing; /* out -- additional bytes needed for result */ __u32 elem_cnt; /* out */ __u32 elem_missed; /* out */ - __u64 val[0]; /* out */ + __u64 val[]; /* out */ }; struct btrfs_ioctl_ino_path_args { diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index d4117152d907..5f32a2a495dc 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -575,7 +575,7 @@ struct btrfs_inode_extref { __le64 parent_objectid; __le64 index; __le16 name_len; - __u8 name[0]; + __u8 name[]; /* name goes here */ } __attribute__ ((__packed__)); diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h index dd2b925b09ac..f1e45f533a72 100644 --- a/include/uapi/linux/can/bcm.h +++ b/include/uapi/linux/can/bcm.h @@ -71,7 +71,7 @@ struct bcm_msg_head { struct bcm_timeval ival1, ival2; canid_t can_id; __u32 nframes; - struct can_frame frames[0]; + struct can_frame frames[]; }; enum { diff --git a/include/uapi/linux/connector.h b/include/uapi/linux/connector.h index 3738936149a2..5ae131c3f145 100644 --- a/include/uapi/linux/connector.h +++ b/include/uapi/linux/connector.h @@ -75,7 +75,7 @@ struct cn_msg { __u16 len; /* Length of the following data */ __u16 flags; - __u8 data[0]; + __u8 data[]; }; #endif /* _UAPI__CONNECTOR_H */ diff --git a/include/uapi/linux/cycx_cfm.h b/include/uapi/linux/cycx_cfm.h index 51f541942ff9..91778c8024b1 100644 --- a/include/uapi/linux/cycx_cfm.h +++ b/include/uapi/linux/cycx_cfm.h @@ -91,7 +91,7 @@ struct cycx_firmware { unsigned short reserved[6]; char descr[CFM_DESCR_LEN]; struct cycx_fw_info info; - unsigned char image[0]; + unsigned char image[]; }; struct cycx_fw_header { diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 2e9550fef90f..8c97d75f3104 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -182,7 +182,7 @@ struct dm_target_spec { struct dm_target_deps { __u32 count; /* Array size */ __u32 padding; /* unused */ - __u64 dev[0]; /* out */ + __u64 dev[]; /* out */ }; /* @@ -192,7 +192,7 @@ struct dm_name_list { __u64 dev; __u32 next; /* offset to the next record from the _start_ of this */ - char name[0]; + char name[]; /* * The following members can be accessed by taking a pointer that @@ -216,7 +216,7 @@ struct dm_target_versions { __u32 next; __u32 version[3]; - char name[0]; + char name[]; }; /* @@ -225,7 +225,7 @@ struct dm_target_versions { struct dm_target_msg { __u64 sector; /* Device sector */ - char message[0]; + char message[]; }; /* diff --git a/include/uapi/linux/dm-log-userspace.h b/include/uapi/linux/dm-log-userspace.h index 5c47a8603376..23dad9565e46 100644 --- a/include/uapi/linux/dm-log-userspace.h +++ b/include/uapi/linux/dm-log-userspace.h @@ -426,7 +426,7 @@ struct dm_ulog_request { __u32 request_type; /* DM_ULOG_* defined above */ __u32 data_size; /* How much data (not including this struct) */ - char data[0]; + char data[]; }; #endif /* __DM_LOG_USERSPACE_H__ */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e0f0ee9bc89e..2d5741fd44bb 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -257,7 +257,7 @@ struct ethtool_tunable { __u32 id; __u32 type_id; __u32 len; - void *data[0]; + void *data[]; }; #define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff @@ -322,7 +322,7 @@ struct ethtool_regs { __u32 cmd; __u32 version; __u32 len; - __u8 data[0]; + __u8 data[]; }; /** @@ -348,7 +348,7 @@ struct ethtool_eeprom { __u32 magic; __u32 offset; __u32 len; - __u8 data[0]; + __u8 data[]; }; /** @@ -752,7 +752,7 @@ struct ethtool_gstrings { __u32 cmd; __u32 string_set; __u32 len; - __u8 data[0]; + __u8 data[]; }; /** @@ -777,7 +777,7 @@ struct ethtool_sset_info { __u32 cmd; __u32 reserved; __u64 sset_mask; - __u32 data[0]; + __u32 data[]; }; /** @@ -817,7 +817,7 @@ struct ethtool_test { __u32 flags; __u32 reserved; __u32 len; - __u64 data[0]; + __u64 data[]; }; /** @@ -834,7 +834,7 @@ struct ethtool_test { struct ethtool_stats { __u32 cmd; __u32 n_stats; - __u64 data[0]; + __u64 data[]; }; /** @@ -851,7 +851,7 @@ struct ethtool_stats { struct ethtool_perm_addr { __u32 cmd; __u32 size; - __u8 data[0]; + __u8 data[]; }; /* boolean flags controlling per-interface behavior characteristics. @@ -1160,7 +1160,7 @@ struct ethtool_rxnfc { struct ethtool_rxfh_indir { __u32 cmd; __u32 size; - __u32 ring_index[0]; + __u32 ring_index[]; }; /** @@ -1201,7 +1201,7 @@ struct ethtool_rxfh { __u8 hfunc; __u8 rsvd8[3]; __u32 rsvd32; - __u32 rss_config[0]; + __u32 rss_config[]; }; #define ETH_RXFH_CONTEXT_ALLOC 0xffffffff #define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff @@ -1286,7 +1286,7 @@ struct ethtool_dump { __u32 version; __u32 flag; __u32 len; - __u8 data[0]; + __u8 data[]; }; #define ETH_FW_DUMP_DISABLE 0 @@ -1318,7 +1318,7 @@ struct ethtool_get_features_block { struct ethtool_gfeatures { __u32 cmd; __u32 size; - struct ethtool_get_features_block features[0]; + struct ethtool_get_features_block features[]; }; /** @@ -1340,7 +1340,7 @@ struct ethtool_set_features_block { struct ethtool_sfeatures { __u32 cmd; __u32 size; - struct ethtool_set_features_block features[0]; + struct ethtool_set_features_block features[]; }; /** @@ -2087,7 +2087,7 @@ struct ethtool_link_settings { __u8 master_slave_state; __u8 reserved1[1]; __u32 reserved[7]; - __u32 link_mode_masks[0]; + __u32 link_mode_masks[]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; * __u32 map_advertising[link_mode_masks_nwords]; diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index f1f89132d60e..197df344307d 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -162,7 +162,7 @@ struct fanotify_event_info_fid { * Following is an opaque struct file_handle that can be passed as * an argument to open_by_handle_at(2). */ - unsigned char handle[0]; + unsigned char handle[]; }; /* diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 07c1cdcb715e..24ca0c00cae3 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -34,7 +34,7 @@ struct fiemap { __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ __u32 fm_extent_count; /* size of fm_extents array (in) */ __u32 fm_reserved; - struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ + struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */ }; #define FIEMAP_MAX_OFFSET (~0ULL) diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index 5effa9832802..92be3ea3c6e0 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -118,7 +118,7 @@ struct fw_cdev_event_response { __u32 type; __u32 rcode; __u32 length; - __u32 data[0]; + __u32 data[]; }; /** @@ -142,7 +142,7 @@ struct fw_cdev_event_request { __u64 offset; __u32 handle; __u32 length; - __u32 data[0]; + __u32 data[]; }; /** @@ -205,7 +205,7 @@ struct fw_cdev_event_request2 { __u32 generation; __u32 handle; __u32 length; - __u32 data[0]; + __u32 data[]; }; /** @@ -265,7 +265,7 @@ struct fw_cdev_event_iso_interrupt { __u32 type; __u32 cycle; __u32 header_length; - __u32 header[0]; + __u32 header[]; }; /** @@ -355,7 +355,7 @@ struct fw_cdev_event_phy_packet { __u32 type; __u32 rcode; __u32 length; - __u32 data[0]; + __u32 data[]; }; /** @@ -803,7 +803,7 @@ struct fw_cdev_set_iso_channels { */ struct fw_cdev_iso_packet { __u32 control; - __u32 header[0]; + __u32 header[]; }; /** diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index bdf7b404b3e7..b7b56871029c 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -90,7 +90,7 @@ struct file_dedupe_range { __u16 dest_count; /* in - total elements in info array */ __u16 reserved1; /* must be zero */ __u32 reserved2; /* must be zero */ - struct file_dedupe_range_info info[0]; + struct file_dedupe_range_info info[]; }; /* And dynamically-tunable limits and defaults: */ diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h index dc52a11ba6d1..578b18aab821 100644 --- a/include/uapi/linux/if_alg.h +++ b/include/uapi/linux/if_alg.h @@ -42,7 +42,7 @@ struct sockaddr_alg_new { struct af_alg_iv { __u32 ivlen; - __u8 iv[0]; + __u8 iv[]; }; /* Socket options */ diff --git a/include/uapi/linux/if_arcnet.h b/include/uapi/linux/if_arcnet.h index 683878036d76..b122cfac7128 100644 --- a/include/uapi/linux/if_arcnet.h +++ b/include/uapi/linux/if_arcnet.h @@ -60,7 +60,7 @@ struct arc_rfc1201 { __u8 proto; /* protocol ID field - varies */ __u8 split_flag; /* for use with split packets */ __be16 sequence; /* sequence number */ - __u8 payload[0]; /* space remaining in packet (504 bytes)*/ + __u8 payload[]; /* space remaining in packet (504 bytes)*/ }; #define RFC1201_HDR_SIZE 4 @@ -69,7 +69,7 @@ struct arc_rfc1201 { */ struct arc_rfc1051 { __u8 proto; /* ARC_P_RFC1051_ARP/RFC1051_IP */ - __u8 payload[0]; /* 507 bytes */ + __u8 payload[]; /* 507 bytes */ }; #define RFC1051_HDR_SIZE 1 @@ -80,7 +80,7 @@ struct arc_rfc1051 { struct arc_eth_encap { __u8 proto; /* Always ARC_P_ETHER */ struct ethhdr eth; /* standard ethernet header (yuck!) */ - __u8 payload[0]; /* 493 bytes */ + __u8 payload[]; /* 493 bytes */ }; #define ETH_ENCAP_HDR_SIZE 14 diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index e7a693c28f16..9abd80dcc46f 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -122,7 +122,7 @@ struct sockaddr_pppol2tpv3in6 { struct pppoe_tag { __be16 tag_type; __be16 tag_len; - char tag_data[0]; + char tag_data[]; } __attribute__ ((packed)); /* Tag identifiers */ @@ -150,7 +150,7 @@ struct pppoe_hdr { __u8 code; __be16 sid; __be16 length; - struct pppoe_tag tag[0]; + struct pppoe_tag tag[]; } __packed; /* Length of entire PPPoE + PPP header */ diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 454ae31b93c7..2ec07de1d73b 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -108,7 +108,7 @@ struct tun_pi { struct tun_filter { __u16 flags; /* TUN_FLT_ flags see above */ __u16 count; /* Number of addresses */ - __u8 addr[0][ETH_ALEN]; + __u8 addr[][ETH_ALEN]; }; #endif /* _UAPI__IF_TUN_H */ diff --git a/include/uapi/linux/igmp.h b/include/uapi/linux/igmp.h index 90c28bc466c6..5930f2437cd1 100644 --- a/include/uapi/linux/igmp.h +++ b/include/uapi/linux/igmp.h @@ -48,7 +48,7 @@ struct igmpv3_grec { __u8 grec_auxwords; __be16 grec_nsrcs; __be32 grec_mca; - __be32 grec_src[0]; + __be32 grec_src[]; }; struct igmpv3_report { @@ -57,7 +57,7 @@ struct igmpv3_report { __sum16 csum; __be16 resv2; __be16 ngrec; - struct igmpv3_grec grec[0]; + struct igmpv3_grec grec[]; }; struct igmpv3_query { @@ -78,7 +78,7 @@ struct igmpv3_query { #endif __u8 qqic; __be16 nsrcs; - __be32 srcs[0]; + __be32 srcs[]; }; #define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */ diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 20ee93f0f876..50655de04c9b 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -104,7 +104,7 @@ struct inet_diag_hostcond { __u8 family; __u8 prefix_len; int port; - __be32 addr[0]; + __be32 addr[]; }; struct inet_diag_markcond { diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h index 884b4846b630..b3e165853d5b 100644 --- a/include/uapi/linux/inotify.h +++ b/include/uapi/linux/inotify.h @@ -23,7 +23,7 @@ struct inotify_event { __u32 mask; /* watch mask */ __u32 cookie; /* cookie to synchronize two events */ __u32 len; /* length (including nulls) of name */ - char name[0]; /* stub for possible name */ + char name[]; /* stub for possible name */ }; /* the following are legal, implemented events that user-space can watch for */ diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index e00bbb9c47bb..961ec16a26b8 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -112,13 +112,13 @@ struct ip_auth_hdr { __be16 reserved; __be32 spi; __be32 seq_no; /* Sequence number */ - __u8 auth_data[0]; /* Variable len but >=4. Mind the 64 bit alignment! */ + __u8 auth_data[]; /* Variable len but >=4. Mind the 64 bit alignment! */ }; struct ip_esp_hdr { __be32 spi; __be32 seq_no; /* Sequence number */ - __u8 enc_data[0]; /* Variable len but >=8. Mind the 64 bit alignment! */ + __u8 enc_data[]; /* Variable len but >=8. Mind the 64 bit alignment! */ }; struct ip_comp_hdr { diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h index 4102ddcb4e14..1ed234e7f251 100644 --- a/include/uapi/linux/ip_vs.h +++ b/include/uapi/linux/ip_vs.h @@ -254,7 +254,7 @@ struct ip_vs_get_dests { unsigned int num_dests; /* the real servers */ - struct ip_vs_dest_entry entrytable[0]; + struct ip_vs_dest_entry entrytable[]; }; @@ -264,7 +264,7 @@ struct ip_vs_get_services { unsigned int num_services; /* service table */ - struct ip_vs_service_entry entrytable[0]; + struct ip_vs_service_entry entrytable[]; }; diff --git a/include/uapi/linux/iso_fs.h b/include/uapi/linux/iso_fs.h index a2555176f6d1..758178f5b52d 100644 --- a/include/uapi/linux/iso_fs.h +++ b/include/uapi/linux/iso_fs.h @@ -137,7 +137,7 @@ struct iso_path_table{ __u8 name_len[2]; /* 721 */ __u8 extent[4]; /* 731 */ __u8 parent[2]; /* 721 */ - char name[0]; + char name[]; } __attribute__((packed)); /* high sierra is identical to iso, except that the date is only 6 bytes, and @@ -154,7 +154,7 @@ struct iso_directory_record { __u8 interleave [ISODCL (28, 28)]; /* 711 */ __u8 volume_sequence_number [ISODCL (29, 32)]; /* 723 */ __u8 name_len [ISODCL (33, 33)]; /* 711 */ - char name [0]; + char name []; } __attribute__((packed)); #define ISOFS_BLOCK_BITS 11 diff --git a/include/uapi/linux/jffs2.h b/include/uapi/linux/jffs2.h index 784ba0b9690a..637ee4a793cf 100644 --- a/include/uapi/linux/jffs2.h +++ b/include/uapi/linux/jffs2.h @@ -123,7 +123,7 @@ struct jffs2_raw_dirent __u8 unused[2]; jint32_t node_crc; jint32_t name_crc; - __u8 name[0]; + __u8 name[]; }; /* The JFFS2 raw inode structure: Used for storage on physical media. */ @@ -155,7 +155,7 @@ struct jffs2_raw_inode jint16_t flags; /* See JFFS2_INO_FLAG_* */ jint32_t data_crc; /* CRC for the (compressed) data. */ jint32_t node_crc; /* CRC for the raw inode (excluding data) */ - __u8 data[0]; + __u8 data[]; }; struct jffs2_raw_xattr { @@ -170,7 +170,7 @@ struct jffs2_raw_xattr { jint16_t value_len; jint32_t data_crc; jint32_t node_crc; - __u8 data[0]; + __u8 data[]; } __attribute__((packed)); struct jffs2_raw_xref @@ -196,7 +196,7 @@ struct jffs2_raw_summary jint32_t padded; /* sum of the size of padding nodes */ jint32_t sum_crc; /* summary information crc */ jint32_t node_crc; /* node crc */ - jint32_t sum[0]; /* inode summary info */ + jint32_t sum[]; /* inode summary info */ }; union jffs2_node_union diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 1d0350e44ae3..ed95dba9fa37 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -13,7 +13,7 @@ struct kcov_remote_arg { __u32 area_size; /* Length of coverage buffer in words */ __u32 num_handles; /* Size of handles array */ __aligned_u64 common_handle; - __aligned_u64 handles[0]; + __aligned_u64 handles[]; }; #define KCOV_REMOTE_MAX_HANDLES 0x100 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5088bd9f1922..74dc8bafcb9e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -542,7 +542,7 @@ struct kvm_coalesced_mmio { struct kvm_coalesced_mmio_ring { __u32 first, last; - struct kvm_coalesced_mmio coalesced_mmio[0]; + struct kvm_coalesced_mmio coalesced_mmio[]; }; #define KVM_COALESCED_MMIO_MAX \ @@ -621,7 +621,7 @@ struct kvm_clear_dirty_log { /* for KVM_SET_SIGNAL_MASK */ struct kvm_signal_mask { __u32 len; - __u8 sigset[0]; + __u8 sigset[]; }; /* for KVM_TPR_ACCESS_REPORTING */ @@ -1221,7 +1221,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing { __u32 nr; __u32 flags; - struct kvm_irq_routing_entry entries[0]; + struct kvm_irq_routing_entry entries[]; }; #endif @@ -1341,7 +1341,7 @@ struct kvm_dirty_tlb { struct kvm_reg_list { __u64 n; /* number of regs */ - __u64 reg[0]; + __u64 reg[]; }; struct kvm_one_reg { diff --git a/include/uapi/linux/minix_fs.h b/include/uapi/linux/minix_fs.h index 95dbcb17eacd..8d9ca8b2c357 100644 --- a/include/uapi/linux/minix_fs.h +++ b/include/uapi/linux/minix_fs.h @@ -97,11 +97,11 @@ struct minix3_super_block { struct minix_dir_entry { __u16 inode; - char name[0]; + char name[]; }; struct minix3_dir_entry { __u32 inode; - char name[0]; + char name[]; }; #endif diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 27a39847d55c..e7401ade6822 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -58,7 +58,7 @@ struct mmc_ioc_cmd { */ struct mmc_ioc_multi_cmd { __u64 num_of_cmds; - struct mmc_ioc_cmd cmds[0]; + struct mmc_ioc_cmd cmds[]; }; #define MMC_IOC_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_cmd) diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 17e02b64ea2e..73516e263627 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -30,25 +30,25 @@ struct nd_cmd_get_config_data_hdr { __u32 in_offset; __u32 in_length; __u32 status; - __u8 out_buf[0]; + __u8 out_buf[]; } __packed; struct nd_cmd_set_config_hdr { __u32 in_offset; __u32 in_length; - __u8 in_buf[0]; + __u8 in_buf[]; } __packed; struct nd_cmd_vendor_hdr { __u32 opcode; __u32 in_length; - __u8 in_buf[0]; + __u8 in_buf[]; } __packed; struct nd_cmd_vendor_tail { __u32 status; __u32 out_length; - __u8 out_buf[0]; + __u8 out_buf[]; } __packed; struct nd_cmd_ars_cap { @@ -86,7 +86,7 @@ struct nd_cmd_ars_status { __u32 reserved; __u64 err_address; __u64 length; - } __packed records[0]; + } __packed records[]; } __packed; struct nd_cmd_clear_error { diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 1bbea8f0681e..84f622a66a7a 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -29,12 +29,12 @@ struct net_dm_config_entry { struct net_dm_config_msg { __u32 entries; - struct net_dm_config_entry options[0]; + struct net_dm_config_entry options[]; }; struct net_dm_alert_msg { __u32 entries; - struct net_dm_drop_point points[0]; + struct net_dm_drop_point points[]; }; struct net_dm_user_msg { diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h index b8c6bb233ac1..796af83a963a 100644 --- a/include/uapi/linux/netfilter/x_tables.h +++ b/include/uapi/linux/netfilter/x_tables.h @@ -28,7 +28,7 @@ struct xt_entry_match { __u16 match_size; } u; - unsigned char data[0]; + unsigned char data[]; }; struct xt_entry_target { @@ -119,7 +119,7 @@ struct xt_counters_info { unsigned int num_counters; /* The counters (actually `number' of these). */ - struct xt_counters counters[0]; + struct xt_counters counters[]; }; #define XT_INV_PROTO 0x40 /* Invert the sense of PROTO. */ diff --git a/include/uapi/linux/netfilter_arp/arp_tables.h b/include/uapi/linux/netfilter_arp/arp_tables.h index bbf5af2b67a8..a6ac2463f787 100644 --- a/include/uapi/linux/netfilter_arp/arp_tables.h +++ b/include/uapi/linux/netfilter_arp/arp_tables.h @@ -109,7 +109,7 @@ struct arpt_entry struct xt_counters counters; /* The matches (if any), then the target. */ - unsigned char elems[0]; + unsigned char elems[]; }; /* @@ -181,7 +181,7 @@ struct arpt_replace { struct xt_counters __user *counters; /* The entries (hang off end: not really an array). */ - struct arpt_entry entries[0]; + struct arpt_entry entries[]; }; /* The argument to ARPT_SO_GET_ENTRIES. */ @@ -193,7 +193,7 @@ struct arpt_get_entries { unsigned int size; /* The entries. */ - struct arpt_entry entrytable[0]; + struct arpt_entry entrytable[]; }; /* Helper functions */ diff --git a/include/uapi/linux/netfilter_bridge/ebt_among.h b/include/uapi/linux/netfilter_bridge/ebt_among.h index 9acf757bc1f7..73b26a280c4f 100644 --- a/include/uapi/linux/netfilter_bridge/ebt_among.h +++ b/include/uapi/linux/netfilter_bridge/ebt_among.h @@ -40,7 +40,7 @@ struct ebt_mac_wormhash_tuple { struct ebt_mac_wormhash { int table[257]; int poolsize; - struct ebt_mac_wormhash_tuple pool[0]; + struct ebt_mac_wormhash_tuple pool[]; }; #define ebt_mac_wormhash_size(x) ((x) ? sizeof(struct ebt_mac_wormhash) \ diff --git a/include/uapi/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h index 50c7fee625ae..1485df28b239 100644 --- a/include/uapi/linux/netfilter_ipv4/ip_tables.h +++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h @@ -121,7 +121,7 @@ struct ipt_entry { struct xt_counters counters; /* The matches (if any), then the target. */ - unsigned char elems[0]; + unsigned char elems[]; }; /* @@ -203,7 +203,7 @@ struct ipt_replace { struct xt_counters __user *counters; /* The entries (hang off end: not really an array). */ - struct ipt_entry entries[0]; + struct ipt_entry entries[]; }; /* The argument to IPT_SO_GET_ENTRIES. */ @@ -215,7 +215,7 @@ struct ipt_get_entries { unsigned int size; /* The entries. */ - struct ipt_entry entrytable[0]; + struct ipt_entry entrytable[]; }; /* Helper functions */ diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h index d9e364f96a5c..766e8e0bcc68 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h +++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h @@ -243,7 +243,7 @@ struct ip6t_replace { struct xt_counters __user *counters; /* The entries (hang off end: not really an array). */ - struct ip6t_entry entries[0]; + struct ip6t_entry entries[]; }; /* The argument to IP6T_SO_GET_ENTRIES. */ @@ -255,7 +255,7 @@ struct ip6t_get_entries { unsigned int size; /* The entries. */ - struct ip6t_entry entrytable[0]; + struct ip6t_entry entrytable[]; }; /* Helper functions */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d37629dbad72..4653834f078f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -491,7 +491,7 @@ struct perf_event_query_bpf { /* * User provided buffer to store program ids */ - __u32 ids[0]; + __u32 ids[]; }; /* diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9a2ee1e39fad..ffbe230ef90b 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -256,7 +256,7 @@ struct tc_u32_sel { short hoff; __be32 hmask; - struct tc_u32_key keys[0]; + struct tc_u32_key keys[]; }; struct tc_u32_mark { @@ -268,7 +268,7 @@ struct tc_u32_mark { struct tc_u32_pcnt { __u64 rcnt; __u64 rhit; - __u64 kcnts[0]; + __u64 kcnts[]; }; /* Flags */ diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index e5a98a16f9b0..6c0aa577730f 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -303,7 +303,7 @@ struct mdp_superblock_1 { * into the 'roles' value. If a device is spare or faulty, then it doesn't * have a meaningful role. */ - __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ + __le16 dev_roles[]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ }; /* feature_map bits */ diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index dcc1b3e6106f..e744c23582eb 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -41,7 +41,7 @@ struct rand_pool_info { int entropy_count; int buf_size; - __u32 buf[0]; + __u32 buf[]; }; /* diff --git a/include/uapi/linux/romfs_fs.h b/include/uapi/linux/romfs_fs.h index a7f1585accef..6aa05e792454 100644 --- a/include/uapi/linux/romfs_fs.h +++ b/include/uapi/linux/romfs_fs.h @@ -27,7 +27,7 @@ struct romfs_super_block { __be32 word1; __be32 size; __be32 checksum; - char name[0]; /* volume name */ + char name[]; /* volume name */ }; /* On disk inode */ @@ -37,7 +37,7 @@ struct romfs_inode { __be32 spec; __be32 size; __be32 checksum; - char name[0]; + char name[]; }; #define ROMFH_TYPE 7 diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 83849a37db5b..eb2747d58a81 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -440,7 +440,7 @@ struct rtnexthop { /* RTA_VIA */ struct rtvia { __kernel_sa_family_t rtvia_family; - __u8 rtvia_addr[0]; + __u8 rtvia_addr[]; }; /* RTM_CACHEINFO */ diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index c4ff1ebd8bcc..ed7d4ecbf53d 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -365,7 +365,7 @@ struct sctp_assoc_change { __u16 sac_outbound_streams; __u16 sac_inbound_streams; sctp_assoc_t sac_assoc_id; - __u8 sac_info[0]; + __u8 sac_info[]; }; /* @@ -436,7 +436,7 @@ struct sctp_remote_error { __u32 sre_length; __be16 sre_error; sctp_assoc_t sre_assoc_id; - __u8 sre_data[0]; + __u8 sre_data[]; }; @@ -453,7 +453,7 @@ struct sctp_send_failed { __u32 ssf_error; struct sctp_sndrcvinfo ssf_info; sctp_assoc_t ssf_assoc_id; - __u8 ssf_data[0]; + __u8 ssf_data[]; }; struct sctp_send_failed_event { @@ -463,7 +463,7 @@ struct sctp_send_failed_event { __u32 ssf_error; struct sctp_sndinfo ssfe_info; sctp_assoc_t ssf_assoc_id; - __u8 ssf_data[0]; + __u8 ssf_data[]; }; /* @@ -1029,7 +1029,7 @@ struct sctp_getaddrs_old { struct sctp_getaddrs { sctp_assoc_t assoc_id; /*input*/ __u32 addr_num; /*output*/ - __u8 addrs[0]; /*output, variable size*/ + __u8 addrs[]; /*output, variable size*/ }; /* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h index 286e8d6a8e98..13bcbc8bba32 100644 --- a/include/uapi/linux/seg6.h +++ b/include/uapi/linux/seg6.h @@ -30,7 +30,7 @@ struct ipv6_sr_hdr { __u8 flags; __u16 tag; - struct in6_addr segments[0]; + struct in6_addr segments[]; }; #define SR6_FLAG1_PROTECTED (1 << 6) diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index eb815e0d0ac3..a74294211290 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -26,7 +26,7 @@ enum { struct seg6_iptunnel_encap { int mode; - struct ipv6_sr_hdr srh[0]; + struct ipv6_sr_hdr srh[]; }; #define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) diff --git a/include/uapi/linux/stm.h b/include/uapi/linux/stm.h index 7bac318b4440..de3579c2cff0 100644 --- a/include/uapi/linux/stm.h +++ b/include/uapi/linux/stm.h @@ -36,7 +36,7 @@ struct stp_policy_id { /* padding */ __u16 __reserved_0; __u32 __reserved_1; - char id[0]; + char id[]; }; #define STP_POLICY_ID_SET _IOWR('%', 0, struct stp_policy_id) diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 27ace512babd..fbd8ca67e107 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -152,7 +152,7 @@ struct tcmu_tmr_entry { __u32 cmd_cnt; __u64 __pad3; __u64 __pad4; - __u16 cmd_ids[0]; + __u16 cmd_ids[]; } __packed; #define TCMU_OP_ALIGN_SIZE sizeof(__u64) diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index 76b7c3f6cd0d..c917c53070d5 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -341,7 +341,7 @@ struct uac_feature_unit_descriptor { __u8 bUnitID; __u8 bSourceID; __u8 bControlSize; - __u8 bmaControls[0]; /* variable length */ + __u8 bmaControls[]; /* variable length */ } __attribute__((packed)); static inline __u8 uac_feature_unit_iFeature(struct uac_feature_unit_descriptor *desc) diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h index 6d61550959ef..acf3852bb676 100644 --- a/include/uapi/linux/usb/cdc.h +++ b/include/uapi/linux/usb/cdc.h @@ -171,7 +171,7 @@ struct usb_cdc_mdlm_detail_desc { /* type is associated with mdlm_desc.bGUID */ __u8 bGuidDescriptorType; - __u8 bDetailData[0]; + __u8 bDetailData[]; } __attribute__ ((packed)); /* "OBEX Control Model Functional Descriptor" */ @@ -379,7 +379,7 @@ struct usb_cdc_ncm_ndp16 { __le32 dwSignature; __le16 wLength; __le16 wNextNdpIndex; - struct usb_cdc_ncm_dpe16 dpe16[0]; + struct usb_cdc_ncm_dpe16 dpe16[]; } __attribute__ ((packed)); /* 32-bit NCM Datagram Pointer Entry */ @@ -395,7 +395,7 @@ struct usb_cdc_ncm_ndp32 { __le16 wReserved6; __le32 dwNextNdpIndex; __le32 dwReserved12; - struct usb_cdc_ncm_dpe32 dpe32[0]; + struct usb_cdc_ncm_dpe32 dpe32[]; } __attribute__ ((packed)); /* CDC NCM subclass 3.2.1 and 3.2.2 */ diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 17ce56198c9a..31fcfa084e63 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -818,7 +818,7 @@ struct usb_key_descriptor { __u8 tTKID[3]; __u8 bReserved; - __u8 bKeyData[0]; + __u8 bKeyData[]; } __attribute__((packed)); /*-------------------------------------------------------------------------*/ diff --git a/include/uapi/linux/usb/raw_gadget.h b/include/uapi/linux/usb/raw_gadget.h index 0be685272eb1..c7d2199134d7 100644 --- a/include/uapi/linux/usb/raw_gadget.h +++ b/include/uapi/linux/usb/raw_gadget.h @@ -60,7 +60,7 @@ enum usb_raw_event_type { struct usb_raw_event { __u32 type; __u32 length; - __u8 data[0]; + __u8 data[]; }; #define USB_RAW_IO_FLAGS_ZERO 0x0001 @@ -90,7 +90,7 @@ struct usb_raw_ep_io { __u16 ep; __u16 flags; __u32 length; - __u8 data[0]; + __u8 data[]; }; /* Maximum number of non-control endpoints in struct usb_raw_eps_info. */ diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h index cf525cddeb94..74a84e02422a 100644 --- a/include/uapi/linux/usbdevice_fs.h +++ b/include/uapi/linux/usbdevice_fs.h @@ -131,7 +131,7 @@ struct usbdevfs_urb { unsigned int signr; /* signal to be sent on completion, or 0 if none should be sent. */ void __user *usercontext; - struct usbdevfs_iso_packet_desc iso_frame_desc[0]; + struct usbdevfs_iso_packet_desc iso_frame_desc[]; }; /* ioctls for talking directly to drivers */ @@ -176,7 +176,7 @@ struct usbdevfs_disconnect_claim { struct usbdevfs_streams { unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */ unsigned int num_eps; - unsigned char eps[0]; + unsigned char eps[]; }; /* diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 634cee485abb..391331a10879 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -107,7 +107,7 @@ struct vhost_memory_region { struct vhost_memory { __u32 nregions; __u32 padding; - struct vhost_memory_region regions[0]; + struct vhost_memory_region regions[]; }; /* VHOST_SCSI specific definitions */ @@ -135,7 +135,7 @@ struct vhost_scsi_target { struct vhost_vdpa_config { __u32 off; __u32 len; - __u8 buf[0]; + __u8 buf[]; }; /* vhost vdpa IOVA range diff --git a/include/uapi/linux/virtio_9p.h b/include/uapi/linux/virtio_9p.h index 441047432258..374b68f8ac6e 100644 --- a/include/uapi/linux/virtio_9p.h +++ b/include/uapi/linux/virtio_9p.h @@ -38,7 +38,7 @@ struct virtio_9p_config { /* length of the tag name */ __virtio16 tag_len; /* non-NULL terminated tag name */ - __u8 tag[0]; + __u8 tag[]; } __attribute__((packed)); #endif /* _LINUX_VIRTIO_9P_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 65e13a099b1a..e8191e0c3b56 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -33,7 +33,7 @@ struct xfrm_sec_ctx { __u8 ctx_alg; __u16 ctx_len; __u32 ctx_sid; - char ctx_str[0]; + char ctx_str[]; }; /* Security Context Domains of Interpretation */ @@ -96,27 +96,27 @@ struct xfrm_replay_state_esn { __u32 oseq_hi; __u32 seq_hi; __u32 replay_window; - __u32 bmp[0]; + __u32 bmp[]; }; struct xfrm_algo { char alg_name[64]; unsigned int alg_key_len; /* in bits */ - char alg_key[0]; + char alg_key[]; }; struct xfrm_algo_auth { char alg_name[64]; unsigned int alg_key_len; /* in bits */ unsigned int alg_trunc_len; /* in bits */ - char alg_key[0]; + char alg_key[]; }; struct xfrm_algo_aead { char alg_name[64]; unsigned int alg_key_len; /* in bits */ unsigned int alg_icv_len; /* in bits */ - char alg_key[0]; + char alg_key[]; }; struct xfrm_stats { -- cgit v1.2.3 From 06e445f740c1a0fe5d16b3dff8a4ef18e124e54e Mon Sep 17 00:00:00 2001 From: Ossama Othman Date: Mon, 27 Jun 2022 18:02:42 -0700 Subject: mptcp: fix conflict with Including before the C library header causes symbol redefinition errors at compile-time due to duplicate declarations and definitions in the header included by . Explicitly include before in when __KERNEL__ is not defined so that the C library compatibility logic in is enabled when including in user space code. Fixes: c11c5906bc0a ("mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support") Signed-off-by: Ossama Othman Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 921963589904..dfe19bf13f4c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -2,16 +2,17 @@ #ifndef _UAPI_MPTCP_H #define _UAPI_MPTCP_H +#ifndef __KERNEL__ +#include /* for sockaddr_in and sockaddr_in6 */ +#include /* for struct sockaddr */ +#endif + #include #include #include /* for sockaddr_in */ #include /* for sockaddr_in6 */ #include /* for sockaddr_storage and sa_family */ -#ifndef __KERNEL__ -#include /* for struct sockaddr */ -#endif - #define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) #define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) #define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) -- cgit v1.2.3 From 69fd337a975c7e690dfe49d9cb4fe5ba1e6db44e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:06 -0700 Subject: bpf: per-cgroup lsm flavor Allow attaching to lsm hooks in the cgroup context. Attaching to per-cgroup LSM works exactly like attaching to other per-cgroup hooks. New BPF_LSM_CGROUP is added to trigger new mode; the actual lsm hook we attach to is signaled via existing attach_btf_id. For the hooks that have 'struct socket' or 'struct sock' as its first argument, we use the cgroup associated with that socket. For the rest, we use 'current' cgroup (this is all on default hierarchy == v2 only). Note that for some hooks that work on 'struct sock' we still take the cgroup from 'current' because some of them work on the socket that hasn't been properly initialized yet. Behind the scenes, we allocate a shim program that is attached to the trampoline and runs cgroup effective BPF programs array. This shim has some rudimentary ref counting and can be shared between several programs attaching to the same lsm hook from different cgroups. Note that this patch bloats cgroup size because we add 211 cgroup_bpf_attach_type(s) for simplicity sake. This will be addressed in the subsequent patch. Also note that we only add non-sleepable flavor for now. To enable sleepable use-cases, bpf_prog_run_array_cg has to grab trace rcu, shim programs have to be freed via trace rcu, cgroup_bpf.effective should be also trace-rcu-managed + maybe some other changes that I'm not aware of. Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-4-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e81362891596..b7479898c879 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -998,6 +998,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From b79c9fc9551b45953a94abf550b7bd3b00e3a0f9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:08 -0700 Subject: bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP We have two options: 1. Treat all BPF_LSM_CGROUP the same, regardless of attach_btf_id 2. Treat BPF_LSM_CGROUP+attach_btf_id as a separate hook point I was doing (2) in the original patch, but switching to (1) here: * bpf_prog_query returns all attached BPF_LSM_CGROUP programs regardless of attach_btf_id * attach_btf_id is exported via bpf_prog_info Reviewed-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-6-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b7479898c879..ad9e7311c4cf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1432,6 +1432,7 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; + __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ @@ -6076,6 +6077,8 @@ struct bpf_prog_info { __u64 run_cnt; __u64 recursion_misses; __u32 verified_insns; + __u32 attach_btf_obj_id; + __u32 attach_btf_id; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit v1.2.3 From 211da42eaa45db7b0edfde187dd88a85fbd466b5 Mon Sep 17 00:00:00 2001 From: Yuwei Wang Date: Wed, 29 Jun 2022 08:48:32 +0000 Subject: net, neigh: introduce interval_probe_time_ms for periodic probe commit ed6cd6a17896 ("net, neigh: Set lower cap for neigh_managed_work rearming") fixed a case when DELAY_PROBE_TIME is configured to 0, the processing of the system work queue hog CPU to 100%, and further more we should introduce a new option used by periodic probe Signed-off-by: Yuwei Wang Signed-off-by: Paolo Abeni --- include/uapi/linux/neighbour.h | 1 + include/uapi/linux/sysctl.h | 37 +++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index 39c565e460c7..a998bf761635 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -154,6 +154,7 @@ enum { NDTPA_QUEUE_LENBYTES, /* u32 */ NDTPA_MCAST_REPROBES, /* u32 */ NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ __NDTPA_MAX }; #define NDTPA_MAX (__NDTPA_MAX - 1) diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 6a3b194c50fe..8981f00204db 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -584,24 +584,25 @@ enum { /* /proc/sys/net//neigh/ */ enum { - NET_NEIGH_MCAST_SOLICIT=1, - NET_NEIGH_UCAST_SOLICIT=2, - NET_NEIGH_APP_SOLICIT=3, - NET_NEIGH_RETRANS_TIME=4, - NET_NEIGH_REACHABLE_TIME=5, - NET_NEIGH_DELAY_PROBE_TIME=6, - NET_NEIGH_GC_STALE_TIME=7, - NET_NEIGH_UNRES_QLEN=8, - NET_NEIGH_PROXY_QLEN=9, - NET_NEIGH_ANYCAST_DELAY=10, - NET_NEIGH_PROXY_DELAY=11, - NET_NEIGH_LOCKTIME=12, - NET_NEIGH_GC_INTERVAL=13, - NET_NEIGH_GC_THRESH1=14, - NET_NEIGH_GC_THRESH2=15, - NET_NEIGH_GC_THRESH3=16, - NET_NEIGH_RETRANS_TIME_MS=17, - NET_NEIGH_REACHABLE_TIME_MS=18, + NET_NEIGH_MCAST_SOLICIT = 1, + NET_NEIGH_UCAST_SOLICIT = 2, + NET_NEIGH_APP_SOLICIT = 3, + NET_NEIGH_RETRANS_TIME = 4, + NET_NEIGH_REACHABLE_TIME = 5, + NET_NEIGH_DELAY_PROBE_TIME = 6, + NET_NEIGH_GC_STALE_TIME = 7, + NET_NEIGH_UNRES_QLEN = 8, + NET_NEIGH_PROXY_QLEN = 9, + NET_NEIGH_ANYCAST_DELAY = 10, + NET_NEIGH_PROXY_DELAY = 11, + NET_NEIGH_LOCKTIME = 12, + NET_NEIGH_GC_INTERVAL = 13, + NET_NEIGH_GC_THRESH1 = 14, + NET_NEIGH_GC_THRESH2 = 15, + NET_NEIGH_GC_THRESH3 = 16, + NET_NEIGH_RETRANS_TIME_MS = 17, + NET_NEIGH_REACHABLE_TIME_MS = 18, + NET_NEIGH_INTERVAL_PROBE_TIME_MS = 19, }; /* /proc/sys/net/dccp */ -- cgit v1.2.3 From 29c1ac230e6056b26846c66881802b581a78ad72 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 30 Jun 2022 13:25:57 +0100 Subject: io_uring: keep sendrecv flags in ioprio We waste a u64 SQE field for flags even though we don't need as many bits and it can be used for something more useful later. Store io_uring specific send/recv flags in sqe->ioprio instead of ->addr2. Signed-off-by: Pavel Begunkov Fixes: 0455d4ccec54 ("io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg") [axboe: change comment in io_uring.h as well] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 53e7dae92e42..f10b59d6693e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -244,7 +244,7 @@ enum io_uring_op { #define IORING_ASYNC_CANCEL_ANY (1U << 2) /* - * send/sendmsg and recv/recvmsg flags (sqe->addr2) + * send/sendmsg and recv/recvmsg flags (sqe->ioprio) * * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send * or receive and arm poll if that yields an -- cgit v1.2.3 From d7cfea332cca483ccd3970ed4de73ed892643e51 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Jan 2022 15:06:30 -0500 Subject: drm/amdkfd: Add KFD SMI event IDs and triggers Define new system management interface event IDs for migration, GPU recoverable page fault, user queues eviction, restore and unmap from GPU events and corresponding event triggers, those will be implemented in the following patches. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 231eb010b823..280edda46faf 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -470,6 +470,43 @@ enum kfd_smi_event { KFD_SMI_EVENT_THERMAL_THROTTLE = 2, KFD_SMI_EVENT_GPU_PRE_RESET = 3, KFD_SMI_EVENT_GPU_POST_RESET = 4, + KFD_SMI_EVENT_MIGRATE_START = 5, + KFD_SMI_EVENT_MIGRATE_END = 6, + KFD_SMI_EVENT_PAGE_FAULT_START = 7, + KFD_SMI_EVENT_PAGE_FAULT_END = 8, + KFD_SMI_EVENT_QUEUE_EVICTION = 9, + KFD_SMI_EVENT_QUEUE_RESTORE = 10, + KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + + /* + * max event number, as a flag bit to get events from all processes, + * this requires super user permission, otherwise will not be able to + * receive event from any process. Without this flag to receive events + * from same process. + */ + KFD_SMI_EVENT_ALL_PROCESS = 64 +}; + +enum KFD_MIGRATE_TRIGGERS { + KFD_MIGRATE_TRIGGER_PREFETCH, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, + KFD_MIGRATE_TRIGGER_TTM_EVICTION +}; + +enum KFD_QUEUE_EVICTION_TRIGGERS { + KFD_QUEUE_EVICTION_TRIGGER_SVM, + KFD_QUEUE_EVICTION_TRIGGER_USERPTR, + KFD_QUEUE_EVICTION_TRIGGER_TTM, + KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, + KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, + KFD_QUEUE_EVICTION_CRIU_RESTORE +}; + +enum KFD_SVM_UNMAP_TRIGGERS { + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE, + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) -- cgit v1.2.3 From 3db7f894549a54dd6f8dfb7fe213ede83249bb94 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 19 Jan 2022 12:57:26 -0500 Subject: drm/amdkfd: Bump KFD API version for SMI profiling event Indicate SMI profiling events available. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 280edda46faf..d993a0d50994 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -35,9 +35,10 @@ * - 1.7 - Checkpoint Restore (CRIU) API * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs * - 1.9 - Add available memory ioctl + * - 1.10 - Add SMI profiler event log */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 9 +#define KFD_IOCTL_MINOR_VERSION 10 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From ecad3b0b99bff7247a11f8c7cb19ac9b0cb28b09 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 23 May 2022 18:55:58 +0530 Subject: wifi: cfg80211: Increase akm_suites array size in cfg80211_crypto_settings Increase akm_suites array size in struct cfg80211_crypto_settings to 10 and advertise the capability to userspace. This allows userspace to send more than two AKMs to driver in netlink commands such as NL80211_CMD_CONNECT. This capability is needed for implementing WPA3-Personal transition mode correctly with any driver that handles roaming internally. Currently, the possible AKMs for multi-AKM connect can include PSK, PSK-SHA-256, SAE, FT-PSK and FT-SAE. Since the count is already 5, increasing the akm_suites array size to 10 should be reasonable for future usecases. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/1653312358-12321-1-git-send-email-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 89f64f46b98d..279f9715919e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2694,6 +2694,13 @@ enum nl80211_commands { * connection. Used with %NL80211_CMD_CONNECT. If this attribute is not * included in NL80211_CMD_CONNECT drivers must not perform MLO connection. * + * @NL80211_ATTR_MAX_NUM_AKM_SUITES: U16 attribute. Indicates maximum number of + * AKM suites allowed for %NL80211_CMD_CONNECT, %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_START_AP in %NL80211_CMD_GET_WIPHY response. If this + * attribute is not present userspace shall consider maximum number of AKM + * suites allowed as %NL80211_MAX_NR_AKM_SUITES which is the legacy maximum + * number prior to the introduction of this attribute. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3214,6 +3221,8 @@ enum nl80211_attrs { NL80211_ATTR_MLO_SUPPORT, + NL80211_ATTR_MAX_NUM_AKM_SUITES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3268,6 +3277,11 @@ enum nl80211_attrs { #define NL80211_HE_MIN_CAPABILITY_LEN 16 #define NL80211_HE_MAX_CAPABILITY_LEN 54 #define NL80211_MAX_NR_CIPHER_SUITES 5 + +/* + * NL80211_MAX_NR_AKM_SUITES is obsolete when %NL80211_ATTR_MAX_NUM_AKM_SUITES + * present in %NL80211_CMD_GET_WIPHY response. + */ #define NL80211_MAX_NR_AKM_SUITES 2 #define NL80211_EHT_MIN_CAPABILITY_LEN 13 #define NL80211_EHT_MAX_CAPABILITY_LEN 51 -- cgit v1.2.3 From e252f2ed1c8c6c3884ab5dd34e003ed21f1fe6e0 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 29 Jun 2022 17:42:10 +0300 Subject: fanotify: introduce FAN_MARK_IGNORE This flag is a new way to configure ignore mask which allows adding and removing the event flags FAN_ONDIR and FAN_EVENT_ON_CHILD in ignore mask. The legacy FAN_MARK_IGNORED_MASK flag would always ignore events on directories and would ignore events on children depending on whether the FAN_EVENT_ON_CHILD flag was set in the (non ignored) mask. FAN_MARK_IGNORE can be used to ignore events on children without setting FAN_EVENT_ON_CHILD in the mark's mask and will not ignore events on directories unconditionally, only when FAN_ONDIR is set in ignore mask. The new behavior is non-downgradable. After calling fanotify_mark() with FAN_MARK_IGNORE once, calling fanotify_mark() with FAN_MARK_IGNORED_MASK on the same object will return EEXIST error. Setting the event flags with FAN_MARK_IGNORE on a non-dir inode mark has no meaning and will return ENOTDIR error. The meaning of FAN_MARK_IGNORED_SURV_MODIFY is preserved with the new FAN_MARK_IGNORE flag, but with a few semantic differences: 1. FAN_MARK_IGNORED_SURV_MODIFY is required for filesystem and mount marks and on an inode mark on a directory. Omitting this flag will return EINVAL or EISDIR error. 2. An ignore mask on a non-directory inode that survives modify could never be downgraded to an ignore mask that does not survive modify. With new FAN_MARK_IGNORE semantics we make that rule explicit - trying to update a surviving ignore mask without the flag FAN_MARK_IGNORED_SURV_MODIFY will return EEXIST error. The conveniene macro FAN_MARK_IGNORE_SURV is added for (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY), because the common case should use short constant names. Link: https://lore.kernel.org/r/20220629144210.2983229-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/uapi/linux/fanotify.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index f1f89132d60e..d8536d77fea1 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -83,12 +83,20 @@ #define FAN_MARK_FLUSH 0x00000080 /* FAN_MARK_FILESYSTEM is 0x00000100 */ #define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 /* These are NOT bitwise flags. Both bits can be used togther. */ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ -- cgit v1.2.3 From 5e25c25aa2c08fb9a79476e029c0b1e3dcd70566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Thu, 30 Jun 2022 16:27:20 +0200 Subject: xfrm: improve wording of comment above XFRM_OFFLOAD flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I have noticed a few minor wording issues in a comment recently added above XFRM_OFFLOAD flags in 7c76ecd9c99b ("xfrm: enforce validity of offload input flags"). Signed-off-by: Petr Vaněk Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 3ed61df9cc91..7929bf9cbee4 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -511,9 +511,9 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; -/* This flag was exposed without any kernel code that supporting it. - * Unfortunately, strongswan has the code that uses sets this flag, - * which makes impossible to reuse this bit. +/* This flag was exposed without any kernel code that supports it. + * Unfortunately, strongswan has the code that sets this flag, + * which makes it impossible to reuse this bit. * * So leave it here to make sure that it won't be reused by mistake. */ -- cgit v1.2.3 From 3c660a5d86f4c01cf641bfea004a49f5860a5bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20M=C3=BCller?= Date: Tue, 28 Jun 2022 16:01:18 +0000 Subject: bpf: Introduce TYPE_MATCH related constants/macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to provide type match support we require a new type of relocation which, in turn, requires toolchain support. Recent LLVM/Clang versions support a new value for the last argument to the __builtin_preserve_type_info builtin, for example. With this change we introduce the necessary constants into relevant header files, mirroring what the compiler may support. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220628160127.607834-2-deso@posteo.net --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ad9e7311c4cf..379e68fb866f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6786,6 +6786,7 @@ enum bpf_core_relo_kind { BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ + BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */ }; /* -- cgit v1.2.3 From 88527790c079fb1ea41cbcfa4450ee37906a2fb0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jul 2022 16:59:24 -0700 Subject: tls: rx: add sockopt for enabling optimistic decrypt with TLS 1.3 Since optimisitic decrypt may add extra load in case of retries require socket owner to explicitly opt-in. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + include/uapi/linux/tls.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 904909d020e2..1c9152add663 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -344,6 +344,7 @@ enum LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */ LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ + LINUX_MIN_TLSDECRYPTRETRY, /* TlsDecryptRetry */ __LINUX_MIB_TLSMAX }; diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index bb8f80812b0b..f1157d8f4acd 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -40,6 +40,7 @@ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ #define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ +#define TLS_RX_EXPECT_NO_PAD 4 /* Attempt opportunistic zero-copy */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -162,6 +163,7 @@ enum { TLS_INFO_TXCONF, TLS_INFO_RXCONF, TLS_INFO_ZC_RO_TX, + TLS_INFO_RX_NO_PAD, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) -- cgit v1.2.3 From fce7bf30d3309ea325a7ec452456f8f7c079f622 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 24 Jun 2022 11:03:37 -0400 Subject: drm/amdkfd: add new flag for svm It is to add new option for always keeping gpu mapping. Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d993a0d50994..d6bda40fbcf7 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -613,6 +613,8 @@ enum kfd_mmio_remap { #define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010 /* GPUs mostly read, may allow similar optimizations as RO, but writes fault */ #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020 +/* Keep GPU memory mapping always valid as if XNACK is disable */ +#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040 /** * kfd_ioctl_svm_op - SVM ioctl operations -- cgit v1.2.3 From bdb2c48e4b38e6dbe82533b437468999ba3ae498 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jul 2022 15:00:38 +0100 Subject: io_uring: explicit sqe padding for ioctl commands 32 bit sqe->cmd_op is an union with 64 bit values. It's always a good idea to do padding explicitly. Also zero check it in prep, so it can be used in the future if needed without compatibility concerns. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e6b95a05e970af79000435166185e85b196b2ba2.1657202417.git.asml.silence@gmail.com [axboe: turn bitwise OR into logical variant] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f10b59d6693e..0ad3da28d2fc 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -22,7 +22,10 @@ struct io_uring_sqe { union { __u64 off; /* offset into file */ __u64 addr2; - __u32 cmd_op; + struct { + __u32 cmd_op; + __u32 __pad1; + }; }; union { __u64 addr; /* pointer to buffer or iovecs */ -- cgit v1.2.3 From f8d3da4ef8faf027261e06b7864583930dd7c7b9 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 6 Jul 2022 16:25:47 -0700 Subject: bpf: Add flags arg to bpf_dynptr_read and bpf_dynptr_write APIs Commit 13bbbfbea759 ("bpf: Add bpf_dynptr_read and bpf_dynptr_write") added the bpf_dynptr_write() and bpf_dynptr_read() APIs. However, it will be needed for some dynptr types to pass in flags as well (e.g. when writing to a skb, the user may like to invalidate the hash or recompute the checksum). This patch adds a "u64 flags" arg to the bpf_dynptr_read() and bpf_dynptr_write() APIs before their UAPI signature freezes where we then cannot change them anymore with a 5.19.x released kernel. Fixes: 13bbbfbea759 ("bpf: Add bpf_dynptr_read and bpf_dynptr_write") Signed-off-by: Joanne Koong Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20220706232547.4016651-1-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62d..ef78e0e1a754 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5222,22 +5222,25 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr. + * is a read-only dynptr or if *flags* is not 0. * * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) * Description -- cgit v1.2.3 From 3f805f8cc23ba35679dd01446929292911c2b469 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 27 Jun 2022 08:35:25 -0700 Subject: LoadPin: Enable loading from trusted dm-verity devices Extend LoadPin to allow loading of kernel files from trusted dm-verity [1] devices. This change adds the concept of trusted verity devices to LoadPin. LoadPin maintains a list of root digests of verity devices it considers trusted. Userspace can populate this list through an ioctl on the new LoadPin securityfs entry 'dm-verity'. The ioctl receives a file descriptor of a file with verity digests as parameter. Verity reads the digests from this file after confirming that the file is located on the pinned root. The digest file must contain one digest per line. The list of trusted digests can only be set up once, which is typically done at boot time. When a kernel file is read LoadPin first checks (as usual) whether the file is located on the pinned root, if so the file can be loaded. Otherwise, if the verity extension is enabled, LoadPin determines whether the file is located on a verity backed device and whether the root digest of that device is in the list of trusted digests. The file can be loaded if the verity device has a trusted root digest. Background: As of now LoadPin restricts loading of kernel files to a single pinned filesystem, typically the rootfs. This works for many systems, however it can result in a bloated rootfs (and OTA updates) on platforms where multiple boards with different hardware configurations use the same rootfs image. Especially when 'optional' files are large it may be preferable to download/install them only when they are actually needed by a given board. Chrome OS uses Downloadable Content (DLC) [2] to deploy certain 'packages' at runtime. As an example a DLC package could contain firmware for a peripheral that is not present on all boards. DLCs use dm-verity to verify the integrity of the DLC content. [1] https://www.kernel.org/doc/html/latest/admin-guide/device-mapper/verity.html [2] https://chromium.googlesource.com/chromiumos/platform2/+/HEAD/dlcservice/docs/developer.md Signed-off-by: Matthias Kaehlcke Acked-by: Mike Snitzer Link: https://lore.kernel.org/lkml/20220627083512.v7.2.I01c67af41d2f6525c6d023101671d7339a9bc8b5@changeid Signed-off-by: Kees Cook --- include/uapi/linux/loadpin.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/uapi/linux/loadpin.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/loadpin.h b/include/uapi/linux/loadpin.h new file mode 100644 index 000000000000..daa6dbb8bb02 --- /dev/null +++ b/include/uapi/linux/loadpin.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2022, Google LLC + */ + +#ifndef _UAPI_LINUX_LOOP_LOADPIN_H +#define _UAPI_LINUX_LOOP_LOADPIN_H + +#define LOADPIN_IOC_MAGIC 'L' + +/** + * LOADPIN_IOC_SET_TRUSTED_VERITY_DIGESTS - Set up the root digests of verity devices + * that loadpin should trust. + * + * Takes a file descriptor from which to read the root digests of trusted verity devices. The file + * is expected to contain a list of digests in ASCII format, with one line per digest. The ioctl + * must be issued on the securityfs attribute 'loadpin/dm-verity' (which can be typically found + * under /sys/kernel/security/loadpin/dm-verity). + */ +#define LOADPIN_IOC_SET_TRUSTED_VERITY_DIGESTS _IOW(LOADPIN_IOC_MAGIC, 0x00, unsigned int) + +#endif /* _UAPI_LINUX_LOOP_LOADPIN_H */ -- cgit v1.2.3 From 2a96271fb66c499e4a89d76a89d3d01170c10bef Mon Sep 17 00:00:00 2001 From: Siarhei Vishniakou Date: Fri, 8 Jul 2022 21:59:23 -0700 Subject: Input: document the units for resolution of size axes Today, the resolution of size axes is not documented. As a result, it's not clear what the canonical interpretation of this value should be. On Android, there is a need to calculate the size of the touch ellipse in physical units (millimeters). After reviewing linux source, it turned out that most of the existing usages are already interpreting this value as "units/mm". This documentation will make it explicit. This will help device implementations with correctly following the linux specs, and will ensure that the devices will work on Android without needing further customized parameters for scaling of major/minor values. Signed-off-by: Siarhei Vishniakou Reviewed-by: Jeff LaBundy Link: https://lore.kernel.org/r/20220520084514.3451193-1-svv@google.com Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ee3127461ee0..328cf545c029 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -78,10 +78,13 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) - * is reported in units per millimeter (units/mm), resolution - * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported - * in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z, + * ABS_MT_POSITION_X, ABS_MT_POSITION_Y) is reported in units + * per millimeter (units/mm), resolution for rotational axes + * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The resolution for the size axes (ABS_MT_TOUCH_MAJOR, + * ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MAJOR, ABS_MT_WIDTH_MINOR) + * is reported in units per millimeter (units/mm). * When INPUT_PROP_ACCELEROMETER is set the resolution changes. * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in * units per g (units/g) and in units per degree per second -- cgit v1.2.3 From faf3bfcb895037ae2a8b89d1048090c9e1291cae Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 6 Jun 2022 16:33:22 -0400 Subject: vfio-pci/zdev: add function handle to clp base capability The function handle is a system-wide unique identifier for a zPCI device. With zPCI instruction interpretation, the host will no longer be executing the zPCI instructions on behalf of the guest. As a result, the guest needs to use the real function handle in order for firmware to associate the instruction with the proper PCI function. Let's provide that handle to the guest. Reviewed-by: Christian Borntraeger Reviewed-by: Pierre Morel Signed-off-by: Matthew Rosato Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20220606203325.110625-19-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/vfio_zdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h index b4309397b6b2..78c022af3d29 100644 --- a/include/uapi/linux/vfio_zdev.h +++ b/include/uapi/linux/vfio_zdev.h @@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base { __u16 fmb_length; /* Measurement Block Length (in bytes) */ __u8 pft; /* PCI Function Type */ __u8 gid; /* PCI function group ID */ + /* End of version 1 */ + __u32 fh; /* PCI function handle */ + /* End of version 2 */ }; /** -- cgit v1.2.3 From ba6090ff8ae01b41288be87ed9f6bed3d8cf5961 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 6 Jun 2022 16:33:23 -0400 Subject: vfio-pci/zdev: different maxstbl for interpreted devices When doing load/store interpretation, the maximum store block length is determined by the underlying firmware, not the host kernel API. Reflect that in the associated Query PCI Function Group clp capability and let userspace decide which is appropriate to present to the guest. Reviewed-by: Pierre Morel Signed-off-by: Matthew Rosato Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20220606203325.110625-20-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/vfio_zdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h index 78c022af3d29..77f2aff1f27e 100644 --- a/include/uapi/linux/vfio_zdev.h +++ b/include/uapi/linux/vfio_zdev.h @@ -50,6 +50,10 @@ struct vfio_device_info_cap_zpci_group { __u16 noi; /* Maximum number of MSIs */ __u16 maxstbl; /* Maximum Store Block Length */ __u8 version; /* Supported PCI Version */ + /* End of version 1 */ + __u8 reserved; + __u16 imaxstbl; /* Maximum Interpreted Store Block Length */ + /* End of version 2 */ }; /** -- cgit v1.2.3 From db1c875e0539518e3d5fe9876ef50975cf4476bb Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Mon, 6 Jun 2022 16:33:24 -0400 Subject: KVM: s390: add KVM_S390_ZPCI_OP to manage guest zPCI devices The KVM_S390_ZPCI_OP ioctl provides a mechanism for managing hardware-assisted virtualization features for s390x zPCI passthrough. Add the first 2 operations, which can be used to enable/disable the specified device for Adapter Event Notification interpretation. Signed-off-by: Matthew Rosato Acked-by: Pierre Morel Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20220606203325.110625-21-mjrosato@linux.ibm.com Signed-off-by: Christian Borntraeger --- include/uapi/linux/kvm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5088bd9f1922..2f302e2287d1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1157,6 +1157,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_TSC_CONTROL 214 #define KVM_CAP_SYSTEM_EVENT_DATA 215 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 +#define KVM_CAP_S390_ZPCI_OP 221 #ifdef KVM_CAP_IRQ_ROUTING @@ -2118,4 +2119,34 @@ struct kvm_stats_desc { /* Available with KVM_CAP_XSAVE2 */ #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +/* Available with KVM_CAP_S390_ZPCI_OP */ +#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 3c512307de4097aaaab3f4741c7a98fe88afa469 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Fri, 8 Jul 2022 17:56:07 +0530 Subject: wifi: nl80211: fix sending link ID info of associated BSS commit dd374f84baec ("wifi: nl80211: expose link ID for associated BSSes") used a top-level attribute to send link ID of the associated BSS in the nested attribute NL80211_ATTR_BSS. But since NL80211_ATTR_BSS is a nested attribute of the attributes defined in enum nl80211_bss, define a new attribute in enum nl80211_bss and use it for sending the link ID of the BSS. Fixes: dd374f84baec ("wifi: nl80211: expose link ID for associated BSSes") Signed-off-by: Veerendranath Jakkam Reviewed-by: Jeff Johnson Link: https://lore.kernel.org/r/20220708122607.1836958-1-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 279f9715919e..7bb1ae59f3a5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4904,6 +4904,7 @@ enum nl80211_bss_scan_width { * Contains a nested array of signal strength attributes (u8, dBm), * using the nesting index as the antenna number. * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz + * @NL80211_BSS_MLO_LINK_ID: MLO link ID of the BSS (u8). * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -4929,6 +4930,7 @@ enum nl80211_bss { NL80211_BSS_PARENT_BSSID, NL80211_BSS_CHAIN_SIGNAL, NL80211_BSS_FREQUENCY_OFFSET, + NL80211_BSS_MLO_LINK_ID, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From 1090c1ea2208702a2fe0e3f71d262e3097d939f6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Jul 2022 19:52:52 -0700 Subject: tls: fix spelling of MIB MIN -> MIB Fixes: 88527790c079 ("tls: rx: add sockopt for enabling optimistic decrypt with TLS 1.3") Signed-off-by: Jakub Kicinski --- include/uapi/linux/snmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 1c9152add663..fd83fb9e525a 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -344,7 +344,7 @@ enum LINUX_MIB_TLSRXDEVICE, /* TlsRxDevice */ LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ - LINUX_MIN_TLSDECRYPTRETRY, /* TlsDecryptRetry */ + LINUX_MIB_TLSDECRYPTRETRY, /* TlsDecryptRetry */ __LINUX_MIB_TLSMAX }; -- cgit v1.2.3 From bb56cea9abd85c22175b31d8f7c44d6c615fe526 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Jul 2022 19:52:53 -0700 Subject: tls: rx: add counter for NoPad violations As discussed with Maxim add a counter for true NoPad violations. This should help deployments catch unexpected padded records vs just control records which always need re-encryption. https: //lore.kernel.org/all/b111828e6ac34baad9f4e783127eba8344ac252d.camel@nvidia.com/ Signed-off-by: Jakub Kicinski --- include/uapi/linux/snmp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index fd83fb9e525a..4d7470036a8b 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -345,6 +345,7 @@ enum LINUX_MIB_TLSDECRYPTERROR, /* TlsDecryptError */ LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ LINUX_MIB_TLSDECRYPTRETRY, /* TlsDecryptRetry */ + LINUX_MIB_TLSRXNOPADVIOL, /* TlsRxNoPadViolation */ __LINUX_MIB_TLSMAX }; -- cgit v1.2.3 From 50e8ca0b675add8eb0e95938448a4a0f9fb0b6dd Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 11 Jul 2022 14:10:55 -0400 Subject: drm/amdkfd: bump KFD version for unified ctx save/restore memory To expose unified memory for ctx save/resotre area feature availablity to libhsakmt. Proposed userspace: https://patchwork.freedesktop.org/series/106218/ Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/linux/kfd_ioctl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d6bda40fbcf7..42b60198b6c5 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -36,9 +36,10 @@ * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs * - 1.9 - Add available memory ioctl * - 1.10 - Add SMI profiler event log + * - 1.11 - Add unified memory for ctx save/restore area */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 10 +#define KFD_IOCTL_MINOR_VERSION 11 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ -- cgit v1.2.3 From 1b870fa5573e260bc74d19f381ab0dd971a8d8e7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 Jul 2022 07:27:31 -0400 Subject: kvm: stats: tell userspace which values are boolean Some of the statistics values exported by KVM are always only 0 or 1. It can be useful to export this fact to userspace so that it can track them specially (for example by polling the value every now and then to compute a % of time spent in a specific state). Therefore, add "boolean value" as a new "unit". While it is not exactly a unit, it walks and quacks like one. In particular, using the type would be wrong because boolean values could be instantaneous or peak values (e.g. "is the rmap allocated?") or even two-bucket histograms (e.g. "number of posted vs. non-posted interrupt injections"). Suggested-by: Amneesh Singh Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5088bd9f1922..811897dadcae 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2083,6 +2083,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES #define KVM_STATS_BASE_SHIFT 8 -- cgit v1.2.3 From 71f28f3136aff5890cd56de78abc673f8393cad9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Jul 2022 22:07:10 +0800 Subject: ublk_drv: add io_uring based userspace block driver This is the driver part of userspace block driver(ublk driver), the other part is userspace daemon part(ublksrv)[1]. The two parts communicate by io_uring's IORING_OP_URING_CMD with one shared cmd buffer for storing io command, and the buffer is read only for ublksrv, each io command is indexed by io request tag directly, and is written by ublk driver. For example, when one READ io request is submitted to ublk block driver, ublk driver stores the io command into cmd buffer first, then completes one IORING_OP_URING_CMD for notifying ublksrv, and the URING_CMD is issued to ublk driver beforehand by ublksrv for getting notification of any new io request, and each URING_CMD is associated with one io request by tag. After ublksrv gets the io command, it translates and handles the ublk io request, such as, for the ublk-loop target, ublksrv translates the request into same request on another file or disk, like the kernel loop block driver. In ublksrv's implementation, the io is still handled by io_uring, and share same ring with IORING_OP_URING_CMD command. When the target io request is done, the same IORING_OP_URING_CMD is issued to ublk driver for both committing io request result and getting future notification of new io request. Another thing done by ublk driver is to copy data between kernel io request and ublksrv's io buffer: 1) before ubsrv handles WRITE request, copy the request's data into ublksrv's userspace io buffer, so that ublksrv can handle the write request 2) after ubsrv handles READ request, copy ublksrv's userspace io buffer into this READ request, then ublk driver can complete the READ request Zero copy may be switched if mm is ready to support it. ublk driver doesn't handle any logic of the specific user space driver, so it is small/simple enough. [1] ublksrv https://github.com/ming1/ubdsrv Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220713140711.97356-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 156 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 include/uapi/linux/ublk_cmd.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h new file mode 100644 index 000000000000..4f0c16ec875e --- /dev/null +++ b/include/uapi/linux/ublk_cmd.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef USER_BLK_DRV_CMD_INC_H +#define USER_BLK_DRV_CMD_INC_H + +#include + +/* ublk server command definition */ + +/* + * Admin commands, issued by ublk server, and handled by ublk driver. + */ +#define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 +#define UBLK_CMD_GET_DEV_INFO 0x02 +#define UBLK_CMD_ADD_DEV 0x04 +#define UBLK_CMD_DEL_DEV 0x05 +#define UBLK_CMD_START_DEV 0x06 +#define UBLK_CMD_STOP_DEV 0x07 + +/* + * IO commands, issued by ublk server, and handled by ublk driver. + * + * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request + * from ublk driver, should be issued only when starting device. After + * the associated cqe is returned, request's tag can be retrieved via + * cqe->userdata. + * + * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled + * this IO request, request's handling result is committed to ublk + * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be + * handled before completing io request. + */ +#define UBLK_IO_FETCH_REQ 0x20 +#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 + +/* only ABORT means that no re-fetch */ +#define UBLK_IO_RES_OK 0 +#define UBLK_IO_RES_ABORT (-ENODEV) + +#define UBLKSRV_CMD_BUF_OFFSET 0 +#define UBLKSRV_IO_BUF_OFFSET 0x80000000 + +/* tag bit is 12bit, so at most 4096 IOs for each queue */ +#define UBLK_MAX_QUEUE_DEPTH 4096 + +/* + * zero copy requires 4k block size, and can remap ublk driver's io + * request into ublksrv's vm space + */ +#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) + +/* device state */ +#define UBLK_S_DEV_DEAD 0 +#define UBLK_S_DEV_LIVE 1 + +/* shipped via sqe->cmd of io_uring command */ +struct ublksrv_ctrl_cmd { + /* sent to which device, must be valid */ + __u32 dev_id; + + /* sent to which queue, must be -1 if the cmd isn't for queue */ + __u16 queue_id; + /* + * cmd specific buffer, can be IN or OUT. + */ + __u16 len; + __u64 addr; + + /* inline data */ + __u64 data[2]; +}; + +struct ublksrv_ctrl_dev_info { + __u16 nr_hw_queues; + __u16 queue_depth; + __u16 block_size; + __u16 state; + + __u32 rq_max_blocks; + __u32 dev_id; + + __u64 dev_blocks; + + __s32 ublksrv_pid; + __s32 reserved0; + __u64 flags[2]; + + /* For ublksrv internal use, invisible to ublk driver */ + __u64 ublksrv_flags; + __u64 reserved1[9]; +}; + +#define UBLK_IO_OP_READ 0 +#define UBLK_IO_OP_WRITE 1 +#define UBLK_IO_OP_FLUSH 2 +#define UBLK_IO_OP_DISCARD 3 +#define UBLK_IO_OP_WRITE_SAME 4 +#define UBLK_IO_OP_WRITE_ZEROES 5 + +#define UBLK_IO_F_FAILFAST_DEV (1U << 8) +#define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) +#define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) +#define UBLK_IO_F_META (1U << 11) +#define UBLK_IO_F_INTEGRITY (1U << 12) +#define UBLK_IO_F_FUA (1U << 13) +#define UBLK_IO_F_PREFLUSH (1U << 14) +#define UBLK_IO_F_NOUNMAP (1U << 15) +#define UBLK_IO_F_SWAP (1U << 16) + +/* + * io cmd is described by this structure, and stored in share memory, indexed + * by request tag. + * + * The data is stored by ublk driver, and read by ublksrv after one fetch command + * returns. + */ +struct ublksrv_io_desc { + /* op: bit 0-7, flags: bit 8-31 */ + __u32 op_flags; + + __u32 nr_sectors; + + /* start sector for this io */ + __u64 start_sector; + + /* buffer address in ublksrv daemon vm space, from ublk driver */ + __u64 addr; +}; + +static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod) +{ + return iod->op_flags & 0xff; +} + +static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) +{ + return iod->op_flags >> 8; +} + +/* issued to ublk driver via /dev/ublkcN */ +struct ublksrv_io_cmd { + __u16 q_id; + + /* for fetch/commit which result */ + __u16 tag; + + /* io result, it is valid for COMMIT* command only */ + __s32 result; + + /* + * userspace buffer address in ublksrv daemon process, valid for + * FETCH* command only + */ + __u64 addr; +}; + +#endif -- cgit v1.2.3 From 0edb3696c1713c42f52acbd8355b545e58f782b1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Jul 2022 22:07:11 +0800 Subject: ublk_drv: support to complete io command via task_work_add Use task_work_add if it is available, since task_work_add can bring up better performance, especially batching signaling ->ubq_daemon can be done. It is observed that task_work_add() can boost iops by +4% on random 4k io test. Also except for completing io command, all other code paths are same with completing io command via io_uring_cmd_complete_in_task. Meantime add one flag of UBLK_F_URING_CMD_COMP_IN_TASK for comparing the mode easily. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220713140711.97356-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 4f0c16ec875e..a3f5e7c21807 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -48,6 +48,12 @@ */ #define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) +/* + * Force to complete io cmd via io_uring_cmd_complete_in_task so that + * performance comparison is done easily with using task_work_add + */ +#define UBLK_F_URING_CMD_COMP_IN_TASK (1UL << 1) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 577e5b8c3924539c7a09e3e00477534f39e61829 Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 8 Jun 2022 12:01:12 +0300 Subject: wifi: cfg80211: add API to add/modify/remove a link station Add an API for adding/modifying/removing a link of a station. Signed-off-by: Shaul Triebitz Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7bb1ae59f3a5..37bfc934325a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1254,6 +1254,10 @@ * without %NL80211_ATTR_MLO_LINK_ID as an easy way to remove all links * in preparation for e.g. roaming to a regular (non-MLO) AP. * + * @NL80211_CMD_ADD_LINK_STA: Add a link to an MLD station + * @NL80211_CMD_MODIFY_LINK_STA: Modify a link of an MLD station + * @NL80211_CMD_REMOVE_LINK_STA: Remove a link of an MLD station + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1501,6 +1505,10 @@ enum nl80211_commands { NL80211_CMD_ADD_LINK, NL80211_CMD_REMOVE_LINK, + NL80211_CMD_ADD_LINK_STA, + NL80211_CMD_MODIFY_LINK_STA, + NL80211_CMD_REMOVE_LINK_STA, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ -- cgit v1.2.3 From 4e9c3af398207d95957ae6c25290891574f2d7e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 Jul 2022 15:02:33 +0200 Subject: wifi: nl80211: add EML/MLD capabilities to per-iftype capabilities We have the per-interface type capabilities, currently for extended capabilities, add the EML/MLD capabilities there to have this advertised by the driver. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 37bfc934325a..3fa586e38f88 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2368,8 +2368,10 @@ enum nl80211_commands { * * @NL80211_ATTR_IFTYPE_EXT_CAPA: Nested attribute of the following attributes: * %NL80211_ATTR_IFTYPE, %NL80211_ATTR_EXT_CAPA, - * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities per - * interface type. + * %NL80211_ATTR_EXT_CAPA_MASK, to specify the extended capabilities and + * other interface-type specific capabilities per interface type. For MLO, + * %NL80211_ATTR_EML_CAPABILITY and %NL80211_ATTR_MLD_CAPA_AND_OPS are + * present. * * @NL80211_ATTR_MU_MIMO_GROUP_DATA: array of 24 bytes that defines a MU-MIMO * groupID for monitor mode. @@ -2709,6 +2711,9 @@ enum nl80211_commands { * suites allowed as %NL80211_MAX_NR_AKM_SUITES which is the legacy maximum * number prior to the introduction of this attribute. * + * @NL80211_ATTR_EML_CAPABILITY: EML Capability information (u16) + * @NL80211_ATTR_MLD_CAPA_AND_OPS: MLD Capabilities and Operations (u16) + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3231,6 +3236,9 @@ enum nl80211_attrs { NL80211_ATTR_MAX_NUM_AKM_SUITES, + NL80211_ATTR_EML_CAPABILITY, + NL80211_ATTR_MLD_CAPA_AND_OPS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 00f6842ef41d90cc335ae4dbb00d71f4f642c712 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 7 Mar 2022 16:32:00 +0000 Subject: media: v4l: Add packed YUV 4:4:4 YUVA and YUVX pixel formats The new YUVA and YUVX are permutations of the existing AYUV and XYUV formats. They are use by the NXP i.MX8 ISI hardware. Signed-off-by: Laurent Pinchart Reviewed-by: Nicolas Dufresne Reviewed-by: Jacopo Mondi Reviewed-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 5311ac4fde35..0028ab74ca7c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -593,6 +593,8 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_XYUV32 v4l2_fourcc('X', 'Y', 'U', 'V') /* 32 XYUV-8-8-8-8 */ #define V4L2_PIX_FMT_VUYA32 v4l2_fourcc('V', 'U', 'Y', 'A') /* 32 VUYA-8-8-8-8 */ #define V4L2_PIX_FMT_VUYX32 v4l2_fourcc('V', 'U', 'Y', 'X') /* 32 VUYX-8-8-8-8 */ +#define V4L2_PIX_FMT_YUVA32 v4l2_fourcc('Y', 'U', 'V', 'A') /* 32 YUVA-8-8-8-8 */ +#define V4L2_PIX_FMT_YUVX32 v4l2_fourcc('Y', 'U', 'V', 'X') /* 32 YUVX-8-8-8-8 */ #define V4L2_PIX_FMT_M420 v4l2_fourcc('M', '4', '2', '0') /* 12 YUV 4:2:0 2 lines y, 1 line uv interleaved */ /* two planes -- one Y, one Cr + Cb interleaved */ -- cgit v1.2.3 From 718d2153ad0de0c7c0b6891eaa7f9918d68b6d5e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 17 Mar 2022 12:37:12 +0000 Subject: media: v4l2: Make colorspace validity checks more future-proof The helper functions that test validity of colorspace-related fields use the last value of the corresponding enums. This isn't very future-proof, as there's a high chance someone adding a new value may forget to update the helpers. Add new "LAST" entries to the enumerations to improve this, and keep them private to the kernel. Signed-off-by: Laurent Pinchart Acked-by: Sakari Ailus Reviewed-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 0028ab74ca7c..e32b9e25258d 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -245,6 +245,14 @@ enum v4l2_colorspace { /* DCI-P3 colorspace, used by cinema projectors */ V4L2_COLORSPACE_DCI_P3 = 12, + +#ifdef __KERNEL__ + /* + * Largest supported colorspace value, assigned by the compiler, used + * by the framework to check for invalid values. + */ + V4L2_COLORSPACE_LAST, +#endif }; /* @@ -283,6 +291,13 @@ enum v4l2_xfer_func { V4L2_XFER_FUNC_NONE = 5, V4L2_XFER_FUNC_DCI_P3 = 6, V4L2_XFER_FUNC_SMPTE2084 = 7, +#ifdef __KERNEL__ + /* + * Largest supported transfer function value, assigned by the compiler, + * used by the framework to check for invalid values. + */ + V4L2_XFER_FUNC_LAST, +#endif }; /* @@ -343,6 +358,13 @@ enum v4l2_ycbcr_encoding { /* SMPTE 240M -- Obsolete HDTV */ V4L2_YCBCR_ENC_SMPTE240M = 8, +#ifdef __KERNEL__ + /* + * Largest supported encoding value, assigned by the compiler, used by + * the framework to check for invalid values. + */ + V4L2_YCBCR_ENC_LAST, +#endif }; /* -- cgit v1.2.3 From 64fe675e999c2c7d753ecaaa1349693c59ce6c11 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 8 Jul 2022 17:21:40 +0100 Subject: media: videodev2.h: add V4L2_CTRL_FLAG_DYNAMIC_ARRAY Add a new flag that indicates that this control is a dynamically sized array. Also document this flag. Currently dynamically sized arrays are limited to one dimensional arrays, but that might change in the future if there is a need for it. The initial use-case of dynamic arrays are stateless codecs. A frame can be divided in many slices, so you want to provide an array containing slice information for each slice. Typically the number of slices is small, but the standard allow for hundreds or thousands of slices. Dynamic arrays are a good solution since sizing the array for the worst case would waste substantial amounts of memory. Acked-by: Nicolas Dufresne Tested-by: Benjamin Gaignard Tested-by: Jernej Skrabec Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index e32b9e25258d..87ebc6baafb6 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1914,6 +1914,7 @@ struct v4l2_querymenu { #define V4L2_CTRL_FLAG_HAS_PAYLOAD 0x0100 #define V4L2_CTRL_FLAG_EXECUTE_ON_WRITE 0x0200 #define V4L2_CTRL_FLAG_MODIFY_LAYOUT 0x0400 +#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 /* Query flags, to be ORed with the control ID */ #define V4L2_CTRL_FLAG_NEXT_CTRL 0x80000000 -- cgit v1.2.3 From 01dcfd53335cfb5c66a6c023ac0f789a5b87ace5 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 8 Jul 2022 17:21:49 +0100 Subject: media: uapi: Move parsed HEVC pixel format out of staging Move HEVC pixel format since we are ready to stabilize the uAPI Signed-off-by: Benjamin Gaignard Reviewed-by: Ezequiel Garcia Acked-by: Nicolas Dufresne Tested-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 87ebc6baafb6..06e3f81ad855 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -736,6 +736,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_FWHT v4l2_fourcc('F', 'W', 'H', 'T') /* Fast Walsh Hadamard Transform (vicodec) */ #define V4L2_PIX_FMT_FWHT_STATELESS v4l2_fourcc('S', 'F', 'W', 'H') /* Stateless FWHT (vicodec) */ #define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */ +#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ /* Vendor-specific formats */ #define V4L2_PIX_FMT_CPIA1 v4l2_fourcc('C', 'P', 'I', 'A') /* cpia1 YUV */ -- cgit v1.2.3 From 16e2d220cdc64a1518878172dc7b50d4f60e5aac Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 8 Jul 2022 17:21:51 +0100 Subject: media: uapi: Move the HEVC stateless control type out of staging Move the HEVC stateless controls types out of staging, and re-number them. Signed-off-by: Benjamin Gaignard Reviewed-by: Ezequiel Garcia Acked-by: Nicolas Dufresne Tested-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 06e3f81ad855..cff2bb78b2cc 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1860,6 +1860,12 @@ enum v4l2_ctrl_type { V4L2_CTRL_TYPE_VP9_COMPRESSED_HDR = 0x0260, V4L2_CTRL_TYPE_VP9_FRAME = 0x0261, + + V4L2_CTRL_TYPE_HEVC_SPS = 0x0270, + V4L2_CTRL_TYPE_HEVC_PPS = 0x0271, + V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS = 0x0272, + V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX = 0x0273, + V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS = 0x0274, }; /* Used in the VIDIOC_QUERYCTRL ioctl for querying controls */ -- cgit v1.2.3 From ca24fef0f2c857b0533f21f9a8a756f9e73d60fb Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 8 Jul 2022 17:21:56 +0100 Subject: media: uapi: move HEVC stateless controls out of staging HEVC uAPI is used by 2 mainline drivers (Hantro, Cedrus) and at least 2 out-of-tree drivers (rkvdec, RPi). The uAPI has been reviewed so it is time to make it 'public' by un-staging it. Signed-off-by: Benjamin Gaignard Reviewed-by: Ezequiel Garcia Acked-by: Nicolas Dufresne Tested-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 459 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/videodev2.h | 5 + 2 files changed, 464 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index dfff69ed88f7..5f46bf4a570c 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1997,6 +1997,465 @@ struct v4l2_ctrl_mpeg2_quantisation { __u8 chroma_non_intra_quantiser_matrix[64]; }; +#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) +#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) +#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) +#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403) +#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404) +#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) +#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) +#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) + +enum v4l2_stateless_hevc_decode_mode { + V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, + V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, +}; + +enum v4l2_stateless_hevc_start_code { + V4L2_STATELESS_HEVC_START_CODE_NONE, + V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, +}; + +#define V4L2_HEVC_SLICE_TYPE_B 0 +#define V4L2_HEVC_SLICE_TYPE_P 1 +#define V4L2_HEVC_SLICE_TYPE_I 2 + +#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) +#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) +#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) +#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) +#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) +#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) +#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) +#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) + +/** + * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set + * + * @video_parameter_set_id: specifies the value of the + * vps_video_parameter_set_id of the active VPS + * @seq_parameter_set_id: provides an identifier for the SPS for + * reference by other syntax elements + * @pic_width_in_luma_samples: specifies the width of each decoded picture + * in units of luma samples + * @pic_height_in_luma_samples: specifies the height of each decoded picture + * in units of luma samples + * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the + * samples of the luma array + * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the + * samples of the chroma arrays + * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of + * the variable MaxPicOrderCntLsb + * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum + * required size of the decoded picture + * buffer for the codec video sequence + * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures + * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the + * value of SpsMaxLatencyPictures array + * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum + * luma coding block size + * @log2_diff_max_min_luma_coding_block_size: specifies the difference between + * the maximum and minimum luma + * coding block size + * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma + * transform block size + * @log2_diff_max_min_luma_transform_block_size: specifies the difference between + * the maximum and minimum luma + * transform block size + * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy + * depth for transform units of + * coding units coded in inter + * prediction mode + * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy + * depth for transform units of + * coding units coded in intra + * prediction mode + * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of + * bits used to represent each of PCM sample + * values of the luma component + * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number + * of bits used to represent each of PCM + * sample values of the chroma components + * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the + * minimum size of coding blocks + * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between + * the maximum and minimum size of + * coding blocks + * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set() + * syntax structures included in the SPS + * @num_long_term_ref_pics_sps: specifies the number of candidate long-term + * reference pictures that are specified in the SPS + * @chroma_format_idc: specifies the chroma sampling + * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number + * of temporal sub-layers + * @reserved: padding field. Should be zeroed by applications. + * @flags: see V4L2_HEVC_SPS_FLAG_{} + */ +struct v4l2_ctrl_hevc_sps { + __u8 video_parameter_set_id; + __u8 seq_parameter_set_id; + __u16 pic_width_in_luma_samples; + __u16 pic_height_in_luma_samples; + __u8 bit_depth_luma_minus8; + __u8 bit_depth_chroma_minus8; + __u8 log2_max_pic_order_cnt_lsb_minus4; + __u8 sps_max_dec_pic_buffering_minus1; + __u8 sps_max_num_reorder_pics; + __u8 sps_max_latency_increase_plus1; + __u8 log2_min_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_luma_coding_block_size; + __u8 log2_min_luma_transform_block_size_minus2; + __u8 log2_diff_max_min_luma_transform_block_size; + __u8 max_transform_hierarchy_depth_inter; + __u8 max_transform_hierarchy_depth_intra; + __u8 pcm_sample_bit_depth_luma_minus1; + __u8 pcm_sample_bit_depth_chroma_minus1; + __u8 log2_min_pcm_luma_coding_block_size_minus3; + __u8 log2_diff_max_min_pcm_luma_coding_block_size; + __u8 num_short_term_ref_pic_sets; + __u8 num_long_term_ref_pics_sps; + __u8 chroma_format_idc; + __u8 sps_max_sub_layers_minus1; + + __u8 reserved[6]; + __u64 flags; +}; + +#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) +#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) +#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) +#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) +#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) +#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) +#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) +#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) +#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) +#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) +#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) +#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) +#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) +#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) +#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) +#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) +#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) +#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) +#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) + +/** + * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set + * + * @pic_parameter_set_id: identifies the PPS for reference by other + * syntax elements + * @num_extra_slice_header_bits: specifies the number of extra slice header + * bits that are present in the slice header RBSP + * for coded pictures referring to the PPS. + * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the + * inferred value of num_ref_idx_l0_active_minus1 + * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the + * inferred value of num_ref_idx_l1_active_minus1 + * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for + * each slice referring to the PPS + * @diff_cu_qp_delta_depth: specifies the difference between the luma coding + * tree block size and the minimum luma coding block + * size of coding units that convey cu_qp_delta_abs + * and cu_qp_delta_sign_flag + * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb + * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr + * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns + * partitioning the picture + * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning + * the picture + * @column_width_minus1: this value plus 1 specifies the width of the each tile column in + * units of coding tree blocks + * @row_height_minus1: this value plus 1 specifies the height of the each tile row in + * units of coding tree blocks + * @pps_beta_offset_div2: specify the default deblocking parameter offsets for + * beta divided by 2 + * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC + * divided by 2 + * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of + * the variable Log2ParMrgLevel + * @reserved: padding field. Should be zeroed by applications. + * @flags: see V4L2_HEVC_PPS_FLAG_{} + */ +struct v4l2_ctrl_hevc_pps { + __u8 pic_parameter_set_id; + __u8 num_extra_slice_header_bits; + __u8 num_ref_idx_l0_default_active_minus1; + __u8 num_ref_idx_l1_default_active_minus1; + __s8 init_qp_minus26; + __u8 diff_cu_qp_delta_depth; + __s8 pps_cb_qp_offset; + __s8 pps_cr_qp_offset; + __u8 num_tile_columns_minus1; + __u8 num_tile_rows_minus1; + __u8 column_width_minus1[20]; + __u8 row_height_minus1[22]; + __s8 pps_beta_offset_div2; + __s8 pps_tc_offset_div2; + __u8 log2_parallel_merge_level_minus2; + __u8 reserved; + __u64 flags; +}; + +#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 + +#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6 +#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7 +#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10 +#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11 +#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12 + +#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 + +/** + * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry + * + * @timestamp: timestamp of the V4L2 capture buffer to use as reference. + * @flags: long term flag for the reference frame + * @field_pic: whether the reference is a field picture or a frame. + * @reserved: padding field. Should be zeroed by applications. + * @pic_order_cnt_val: the picture order count of the current picture. + */ +struct v4l2_hevc_dpb_entry { + __u64 timestamp; + __u8 flags; + __u8 field_pic; + __u16 reserved; + __s32 pic_order_cnt_val; +}; + +/** + * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters + * + * @delta_luma_weight_l0: the difference of the weighting factor applied + * to the luma prediction value for list 0 + * @luma_offset_l0: the additive offset applied to the luma prediction value + * for list 0 + * @delta_chroma_weight_l0: the difference of the weighting factor applied + * to the chroma prediction values for list 0 + * @chroma_offset_l0: the difference of the additive offset applied to + * the chroma prediction values for list 0 + * @delta_luma_weight_l1: the difference of the weighting factor applied + * to the luma prediction value for list 1 + * @luma_offset_l1: the additive offset applied to the luma prediction value + * for list 1 + * @delta_chroma_weight_l1: the difference of the weighting factor applied + * to the chroma prediction values for list 1 + * @chroma_offset_l1: the difference of the additive offset applied to + * the chroma prediction values for list 1 + * @luma_log2_weight_denom: the base 2 logarithm of the denominator for + * all luma weighting factors + * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm + * of the denominator for all chroma + * weighting factors + */ +struct v4l2_hevc_pred_weight_table { + __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; + + __u8 luma_log2_weight_denom; + __s8 delta_chroma_log2_weight_denom; +}; + +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) +#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) + +/** + * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters + * + * This control is a dynamically sized 1-dimensional array, + * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it. + * + * @bit_size: size (in bits) of the current slice data + * @data_byte_offset: offset (in bytes) to the video data in the current slice data + * @num_entry_point_offsets: specifies the number of entry point offset syntax + * elements in the slice header. + * @nal_unit_type: specifies the coding type of the slice (B, P or I) + * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit + * @slice_type: see V4L2_HEVC_SLICE_TYPE_{} + * @colour_plane_id: specifies the colour plane associated with the current slice + * @slice_pic_order_cnt: specifies the picture order count + * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum + * reference index for reference picture list 0 + * that may be used to decode the slice + * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum + * reference index for reference picture list 1 + * that may be used to decode the slice + * @collocated_ref_idx: specifies the reference index of the collocated picture used + * for temporal motion vector prediction + * @five_minus_max_num_merge_cand: specifies the maximum number of merging + * motion vector prediction candidates supported in + * the slice subtracted from 5 + * @slice_qp_delta: specifies the initial value of QpY to be used for the coding + * blocks in the slice + * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset + * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset + * @slice_act_y_qp_offset: screen content extension parameters + * @slice_act_cb_qp_offset: screen content extension parameters + * @slice_act_cr_qp_offset: screen content extension parameters + * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2 + * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2 + * @pic_struct: indicates whether a picture should be displayed as a frame or as one or + * more fields + * @reserved0: padding field. Should be zeroed by applications. + * @slice_segment_addr: specifies the address of the first coding tree block in + * the slice segment + * @ref_idx_l0: the list of L0 reference elements as indices in the DPB + * @ref_idx_l1: the list of L1 reference elements as indices in the DPB + * @short_term_ref_pic_set_size: specifies the size of short-term reference + * pictures set included in the SPS + * @long_term_ref_pic_set_size: specifies the size of long-term reference + * pictures set include in the SPS + * @pred_weight_table: the prediction weight coefficients for inter-picture + * prediction + * @reserved1: padding field. Should be zeroed by applications. + * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{} + */ +struct v4l2_ctrl_hevc_slice_params { + __u32 bit_size; + __u32 data_byte_offset; + __u32 num_entry_point_offsets; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ + __u8 nal_unit_type; + __u8 nuh_temporal_id_plus1; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u8 slice_type; + __u8 colour_plane_id; + __s32 slice_pic_order_cnt; + __u8 num_ref_idx_l0_active_minus1; + __u8 num_ref_idx_l1_active_minus1; + __u8 collocated_ref_idx; + __u8 five_minus_max_num_merge_cand; + __s8 slice_qp_delta; + __s8 slice_cb_qp_offset; + __s8 slice_cr_qp_offset; + __s8 slice_act_y_qp_offset; + __s8 slice_act_cb_qp_offset; + __s8 slice_act_cr_qp_offset; + __s8 slice_beta_offset_div2; + __s8 slice_tc_offset_div2; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ + __u8 pic_struct; + + __u8 reserved0[3]; + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + __u32 slice_segment_addr; + __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u16 short_term_ref_pic_set_size; + __u16 long_term_ref_pic_set_size; + + /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ + struct v4l2_hevc_pred_weight_table pred_weight_table; + + __u8 reserved1[2]; + __u64 flags; +}; + +#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 +#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 +#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 + +/** + * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters + * + * @pic_order_cnt_val: picture order count + * @short_term_ref_pic_set_size: specifies the size of short-term reference + * pictures set included in the SPS of the first slice + * @long_term_ref_pic_set_size: specifies the size of long-term reference + * pictures set include in the SPS of the first slice + * @num_active_dpb_entries: the number of entries in dpb + * @num_poc_st_curr_before: the number of reference pictures in the short-term + * set that come before the current frame + * @num_poc_st_curr_after: the number of reference pictures in the short-term + * set that come after the current frame + * @num_poc_lt_curr: the number of reference pictures in the long-term set + * @poc_st_curr_before: provides the index of the short term before references + * in DPB array + * @poc_st_curr_after: provides the index of the short term after references + * in DPB array + * @poc_lt_curr: provides the index of the long term references in DPB array + * @reserved: padding field. Should be zeroed by applications. + * @dpb: the decoded picture buffer, for meta-data about reference frames + * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{} + */ +struct v4l2_ctrl_hevc_decode_params { + __s32 pic_order_cnt_val; + __u16 short_term_ref_pic_set_size; + __u16 long_term_ref_pic_set_size; + __u8 num_active_dpb_entries; + __u8 num_poc_st_curr_before; + __u8 num_poc_st_curr_after; + __u8 num_poc_lt_curr; + __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u8 reserved[4]; + struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; + __u64 flags; +}; + +/** + * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters + * + * @scaling_list_4x4: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_8x8: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_16x16: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_32x32: scaling list is used for the scaling process for + * transform coefficients. The values on each scaling + * list are expected in raster scan order + * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process + * for transform coefficients. The values on each + * scaling list are expected in raster scan order. + * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process + * for transform coefficients. The values on each + * scaling list are expected in raster scan order. + */ +struct v4l2_ctrl_hevc_scaling_matrix { + __u8 scaling_list_4x4[6][16]; + __u8 scaling_list_8x8[6][64]; + __u8 scaling_list_16x16[6][64]; + __u8 scaling_list_32x32[2][64]; + __u8 scaling_list_dc_coef_16x16[6]; + __u8 scaling_list_dc_coef_32x32[2]; +}; + #define V4L2_CID_COLORIMETRY_CLASS_BASE (V4L2_CTRL_CLASS_COLORIMETRY | 0x900) #define V4L2_CID_COLORIMETRY_CLASS (V4L2_CTRL_CLASS_COLORIMETRY | 1) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index cff2bb78b2cc..d6fac2344033 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1793,6 +1793,11 @@ struct v4l2_ext_control { struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation; struct v4l2_ctrl_vp9_compressed_hdr __user *p_vp9_compressed_hdr_probs; struct v4l2_ctrl_vp9_frame __user *p_vp9_frame; + struct v4l2_ctrl_hevc_sps __user *p_hevc_sps; + struct v4l2_ctrl_hevc_pps __user *p_hevc_pps; + struct v4l2_ctrl_hevc_slice_params __user *p_hevc_slice_params; + struct v4l2_ctrl_hevc_scaling_matrix __user *p_hevc_scaling_matrix; + struct v4l2_ctrl_hevc_decode_params __user *p_hevc_decode_params; void __user *ptr; }; } __attribute__ ((packed)); -- cgit v1.2.3 From ceefa81e6e69b020997205e5c30a42d43aa5ae63 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 15 Jul 2022 16:03:22 +0200 Subject: serial: remove VR41XX serial driver Commit d3164e2f3b0a ("MIPS: Remove VR41xx support") removed support for MIPS VR41xx platform, so remove exclusive drivers for this platform, too. Signed-off-by: Thomas Bogendoerfer Link: https://lore.kernel.org/r/20220715140322.135825-1-tsbogend@alpha.franken.de Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 6faf502b7860..3ba34d8378bd 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -124,10 +124,6 @@ /* TXX9 type number */ #define PORT_TXX9 64 -/* NEC VR4100 series SIU/DSIU */ -#define PORT_VR41XX_SIU 65 -#define PORT_VR41XX_DSIU 66 - /* Samsung S3C2400 SoC */ #define PORT_S3C2400 67 -- cgit v1.2.3 From 3c8e19d3d3f9a20cde987fa73fd83b13dcc8604f Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 6 Jul 2022 19:28:55 +0100 Subject: media: Add P010 tiled format Add P010 tiled format [rebased, updated pixel format name and added description] Tested-by: Benjamin Gaignard Signed-off-by: Ezequiel Garcia Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index d6fac2344033..01e630f2ec78 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -654,6 +654,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_NV12_4L4 v4l2_fourcc('V', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 4x4 tiles */ #define V4L2_PIX_FMT_NV12_16L16 v4l2_fourcc('H', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 16x16 tiles */ #define V4L2_PIX_FMT_NV12_32L32 v4l2_fourcc('S', 'T', '1', '2') /* 12 Y/CbCr 4:2:0 32x32 tiles */ +#define V4L2_PIX_FMT_P010_4L4 v4l2_fourcc('T', '0', '1', '0') /* 12 Y/CbCr 4:2:0 10-bit 4x4 macroblocks */ /* Tiled YUV formats, non contiguous planes */ #define V4L2_PIX_FMT_NV12MT v4l2_fourcc('T', 'M', '1', '2') /* 12 Y/CbCr 4:2:0 64x32 tiles */ -- cgit v1.2.3 From ddefb2d205539418f3c3851a3e06fac9624f257d Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 14 Jul 2022 17:44:05 +0800 Subject: net/smc: Extend SMC-R link group netlink attribute Extend SMC-R link group netlink attribute SMC_GEN_LGR_SMCR. Introduce SMC_NLA_LGR_R_BUF_TYPE to show the buffer type of SMC-R link group. Signed-off-by: Wen Gu Signed-off-by: David S. Miller --- include/uapi/linux/smc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/smc.h b/include/uapi/linux/smc.h index 693f549f6966..bb4dacca31e7 100644 --- a/include/uapi/linux/smc.h +++ b/include/uapi/linux/smc.h @@ -124,6 +124,7 @@ enum { SMC_NLA_LGR_R_V2, /* nest */ SMC_NLA_LGR_R_NET_COOKIE, /* u64 */ SMC_NLA_LGR_R_PAD, /* flag */ + SMC_NLA_LGR_R_BUF_TYPE, /* u8 */ __SMC_NLA_LGR_R_MAX, SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1 }; -- cgit v1.2.3 From d276a22314c2bad9136c5e0b09eb3c8a560e1161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jul 2022 08:30:13 +0200 Subject: ublk: remove UBLK_IO_F_INTEGRITY The ublk protocol has no mechanism to actually transfer the integrity metadata, so don't define this flag, which requires that an integrity payload is attached to a bio. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220718063013.335531-1-hch@lst.de Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index a3f5e7c21807..d6879eea2fde 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -106,7 +106,6 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) #define UBLK_IO_F_META (1U << 11) -#define UBLK_IO_F_INTEGRITY (1U << 12) #define UBLK_IO_F_FUA (1U << 13) #define UBLK_IO_F_PREFLUSH (1U << 14) #define UBLK_IO_F_NOUNMAP (1U << 15) -- cgit v1.2.3 From 450a563924ae9437758bd468c5b7cee9468ce749 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 19 Jul 2022 12:52:29 +0000 Subject: KVM: stats: Fix value for KVM_STATS_UNIT_MAX for boolean stats commit 1b870fa5573e ("kvm: stats: tell userspace which values are boolean") added a new stat unit (boolean) but failed to raise KVM_STATS_UNIT_MAX. Fix by pointing UNIT_MAX at the new max value of UNIT_BOOLEAN. Fixes: 1b870fa5573e ("kvm: stats: tell userspace which values are boolean") Reported-by: Janis Schoetterl-Glausch Signed-off-by: Oliver Upton Message-Id: <20220719125229.2934273-1-oupton@google.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 811897dadcae..860f867c50c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2084,7 +2084,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) -- cgit v1.2.3 From bdb2bc7599298ebb677e40fc92b1fa9e69e05098 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 15 Jul 2022 12:38:00 -0700 Subject: bpf: fix bpf_skb_pull_data documentation Fix documentation for bpf_skb_pull_data() helper for when len == 0. Fixes: fa15601ab31e ("bpf: add documentation for eBPF helpers (33-41)") Signed-off-by: Joanne Koong Acked-by: Quentin Monnet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220715193800.3940070-1-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 379e68fb866f..ffcbf79a556b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2361,7 +2361,8 @@ union bpf_attr { * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. + * *len*, then all bytes in the linear part of *skb* will be made + * readable and writable. * * This helper is only needed for reading and writing with direct * packet access. -- cgit v1.2.3 From 9d24322e887b6a3d3f9f9c3e76937a646102c8c1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Jul 2022 13:52:46 -0700 Subject: PCI/DOE: Add DOE mailbox support functions Introduced in a PCIe r6.0, sec 6.30, DOE provides a config space based mailbox with standard protocol discovery. Each mailbox is accessed through a DOE Extended Capability. Each DOE mailbox must support the DOE discovery protocol in addition to any number of additional protocols. Define core PCIe functionality to manage a single PCIe DOE mailbox at a defined config space offset. Functionality includes iterating, creating, query of supported protocol, and task submission. Destruction of the mailboxes is device managed. Cc: "Li, Ming" Cc: Bjorn Helgaas Cc: Matthew Wilcox Acked-by: Bjorn Helgaas Signed-off-by: Jonathan Cameron Co-developed-by: Ira Weiny Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20220719205249.566684-4-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/uapi/linux/pci_regs.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 108f8523fa04..57b8e2ffb1dd 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -737,7 +737,8 @@ #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT +#define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -1103,4 +1104,30 @@ #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 +/* Data Object Exchange */ +#define PCI_DOE_CAP 0x04 /* DOE Capabilities Register */ +#define PCI_DOE_CAP_INT_SUP 0x00000001 /* Interrupt Support */ +#define PCI_DOE_CAP_INT_MSG_NUM 0x00000ffe /* Interrupt Message Number */ +#define PCI_DOE_CTRL 0x08 /* DOE Control Register */ +#define PCI_DOE_CTRL_ABORT 0x00000001 /* DOE Abort */ +#define PCI_DOE_CTRL_INT_EN 0x00000002 /* DOE Interrupt Enable */ +#define PCI_DOE_CTRL_GO 0x80000000 /* DOE Go */ +#define PCI_DOE_STATUS 0x0c /* DOE Status Register */ +#define PCI_DOE_STATUS_BUSY 0x00000001 /* DOE Busy */ +#define PCI_DOE_STATUS_INT_STATUS 0x00000002 /* DOE Interrupt Status */ +#define PCI_DOE_STATUS_ERROR 0x00000004 /* DOE Error */ +#define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */ +#define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */ +#define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */ + +/* DOE Data Object - note not actually registers */ +#define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff +#define PCI_DOE_DATA_OBJECT_HEADER_1_TYPE 0x00ff0000 +#define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH 0x0003ffff + +#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000 +#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000 + #endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From e70a3263a7eed768d5f947b8f2aff8d2a79c9d97 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 19 Jul 2022 23:35:48 +0900 Subject: can: error: specify the values of data[5..7] of CAN error frames Currently, data[5..7] of struct can_frame, when used as a CAN error frame, are defined as being "controller specific". Device specific behaviours are problematic because it prevents someone from writing code which is portable between devices. As a matter of fact, data[5] is never used, data[6] is always used to report TX error counter and data[7] is always used to report RX error counter. can-utils also relies on this. This patch updates the comment in the uapi header to specify that data[5] is reserved (and thus should not be used) and that data[6..7] are used for error counters. Fixes: 0d66548a10cb ("[CAN]: Add PF_CAN core module") Link: https://lore.kernel.org/all/20220719143550.3681-11-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/error.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index 34633283de64..a1000cb63063 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -120,6 +120,9 @@ #define CAN_ERR_TRX_CANL_SHORT_TO_GND 0x70 /* 0111 0000 */ #define CAN_ERR_TRX_CANL_SHORT_TO_CANH 0x80 /* 1000 0000 */ -/* controller specific additional information / data[5..7] */ +/* data[5] is reserved (do not use) */ + +/* TX error counter / data[6] */ +/* RX error counter / data[7] */ #endif /* _UAPI_CAN_ERROR_H */ -- cgit v1.2.3 From 3e5c291c7942d0909a48bc5ec1b9bba136465166 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 19 Jul 2022 23:35:49 +0900 Subject: can: add CAN_ERR_CNT flag to notify availability of error counter Add a dedicated flag in uapi/linux/can/error.h to notify the userland that fields data[6] and data[7] of the CAN error frame were respectively populated with the tx and rx error counters. For all driver tree-wide, set up this flags whenever needed. Link: https://lore.kernel.org/all/20220719143550.3681-12-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/error.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index a1000cb63063..b7c3efd9ff99 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -57,6 +57,8 @@ #define CAN_ERR_BUSOFF 0x00000040U /* bus off */ #define CAN_ERR_BUSERROR 0x00000080U /* bus error (may flood!) */ #define CAN_ERR_RESTARTED 0x00000100U /* controller restarted */ +#define CAN_ERR_CNT 0x00000200U /* TX error counter / data[6] */ + /* RX error counter / data[7] */ /* arbitration lost in bit ... / data[0] */ #define CAN_ERR_LOSTARB_UNSPEC 0x00 /* unspecified */ -- cgit v1.2.3 From 3f9c26210cf80ea8cb5dd901aba5feb77200b085 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 19 Jul 2022 23:35:50 +0900 Subject: can: error: add definitions for the different CAN error thresholds Currently, drivers are using magic numbers to derive the CAN error states from the error counter. Add three macro declarations to remediate this. For reference, the error-active, error-passive and bus-off are defined in ISO 11898, section 12.1.4.2 "Error counting". Although ISO 11898 does not define error-warning state, this extra value is also commonly used and is thus also added. Link: https://lore.kernel.org/all/20220719143550.3681-13-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/error.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index b7c3efd9ff99..acc1ac393d2a 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -127,4 +127,17 @@ /* TX error counter / data[6] */ /* RX error counter / data[7] */ +/* CAN state thresholds + * + * Error counter Error state + * ----------------------------------- + * 0 - 95 Error-active + * 96 - 127 Error-warning + * 128 - 255 Error-passive + * 256 and greater Bus-off + */ +#define CAN_ERROR_WARNING_THRESHOLD 96 +#define CAN_ERROR_PASSIVE_THRESHOLD 128 +#define CAN_BUS_OFF_THRESHOLD 256 + #endif /* _UAPI_CAN_ERROR_H */ -- cgit v1.2.3 From f5ecfee94493475783074e86ded10a0499d779fc Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 14 Jul 2022 21:43:34 +0200 Subject: KVM: s390: resetting the Topology-Change-Report During a subsystem reset the Topology-Change-Report is cleared. Let's give userland the possibility to clear the MTCR in the case of a subsystem reset. To migrate the MTCR, we give userland the possibility to query the MTCR state. We indicate KVM support for the CPU topology facility with a new KVM capability: KVM_CAP_S390_CPU_TOPOLOGY. Signed-off-by: Pierre Morel Reviewed-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Message-Id: <20220714194334.127812-1-pmorel@linux.ibm.com> Link: https://lore.kernel.org/all/20220714194334.127812-1-pmorel@linux.ibm.com/ [frankja@linux.ibm.com: Simple conflict resolution in Documentation/virt/kvm/api.rst] Signed-off-by: Janosch Frank --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 20817dd7f2f1..7e06194129e3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1168,6 +1168,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_NOTIFY_VMEXIT 219 #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 #define KVM_CAP_S390_ZPCI_OP 221 +#define KVM_CAP_S390_CPU_TOPOLOGY 222 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From d0b55afa47694f6f61b40f578ede7bde1648fe48 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 6 Jul 2022 17:20:52 -0700 Subject: dmaengine: idxd: Correct IAX operation code names Some IAX operation code nomenclatures are misleading or don't match with others: 1. Operation code 0x4c is Zero Compress 32. IAX_OPCODE_DECOMP_32 is a misleading name. Change it to IAX_OPCODE_ZERO_COMP_32. 2. Operation code 0x4d is Zero Compress 16. IAX_OPCODE_DECOMP_16 is a misleading name. Change it to IAX_OPCODE_ZERO_COMP_16. 3. IAX_OPCDE_FIND_UNIQUE is corrected to match with other nomenclatures. Co-developed-by: Li Zhang Signed-off-by: Li Zhang Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20220707002052.1546361-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index bce7c43657d5..095299c75828 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -89,14 +89,14 @@ enum iax_opcode { IAX_OPCODE_CRC64, IAX_OPCODE_ZERO_DECOMP_32 = 0x48, IAX_OPCODE_ZERO_DECOMP_16, - IAX_OPCODE_DECOMP_32 = 0x4c, - IAX_OPCODE_DECOMP_16, + IAX_OPCODE_ZERO_COMP_32 = 0x4c, + IAX_OPCODE_ZERO_COMP_16, IAX_OPCODE_SCAN = 0x50, IAX_OPCODE_SET_MEMBER, IAX_OPCODE_EXTRACT, IAX_OPCODE_SELECT, IAX_OPCODE_RLE_BURST, - IAX_OPCDE_FIND_UNIQUE, + IAX_OPCODE_FIND_UNIQUE, IAX_OPCODE_EXPAND, }; -- cgit v1.2.3 From 5f8bcc837a9640ba4bf5e7b1d7f9b254ea029f47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:10 +0200 Subject: ublk: remove UBLK_IO_F_PREFLUSH REQ_PREFLUSH is turned into REQ_OP_FLUSH by the flush state machine and thus never seen by a blk-mq based driver. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-3-hch@lst.de Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index d6879eea2fde..917580b34198 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -107,7 +107,6 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) #define UBLK_IO_F_META (1U << 11) #define UBLK_IO_F_FUA (1U << 13) -#define UBLK_IO_F_PREFLUSH (1U << 14) #define UBLK_IO_F_NOUNMAP (1U << 15) #define UBLK_IO_F_SWAP (1U << 16) -- cgit v1.2.3 From 9dd1953846c7cd58100a5c6bd90db54e2c60668a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Jul 2022 10:26:50 +0200 Subject: wifi: nl80211/mac80211: clarify link ID in control port TX Clarify the link ID behaviour in control port TX, we need it to select the link to transmit on for both MLD and non-MLD receivers, but select the link address as the SA only if the receiver is not an MLD. Fixes: 67207bab9341 ("wifi: cfg80211/mac80211: Support control port TX from specific link") Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3fa586e38f88..d4d6ba585b41 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1119,6 +1119,12 @@ * has been received. %NL80211_ATTR_FRAME is used to specify the * frame contents. The frame is the raw EAPoL data, without ethernet or * 802.11 headers. + * For an MLD transmitter, the %NL80211_ATTR_MLO_LINK_ID may be given and + * its effect will depend on the destination: If the destination is known + * to be an MLD, this will be used as a hint to select the link to transmit + * the frame on. If the destination is not an MLD, this will select both + * the link to transmit on and the source address will be set to the link + * address of that link. * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added * indicating the protocol type of the received frame; whether the frame -- cgit v1.2.3 From 80b0ed70a271d375feb2286696ca8af147a035cf Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 26 Jan 2022 16:06:35 +0200 Subject: wifi: nl80211: add RX and TX timestamp attributes Add attributes for reporting hardware timestamps for management frames RX and TX. These attributes will be used for reporting hardware timestamps for Timing measurement and Fine Timing Measurement action frames, which will allow userspace applications to measure the path delay between devices and sync clocks. For TX, these attributes are used for reporting the frame RX time and the ack TX time. For TX, they are used for reporting the frame TX time and the ack RX time. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d4d6ba585b41..5275dcbc5ee8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -764,6 +764,9 @@ * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA * counters which will be updated to the current value. This attribute * is used during CSA period. + * For RX notification, %NL80211_ATTR_RX_HW_TIMESTAMP may be included to + * indicate the frame RX timestamp and %NL80211_ATTR_TX_HW_TIMESTAMP may + * be included to indicate the ack TX timestamp. * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this * command may be used with the corresponding cookie to cancel the wait * time if it is known that it is no longer necessary. This command is @@ -774,7 +777,9 @@ * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged - * the frame. + * the frame. %NL80211_ATTR_TX_HW_TIMESTAMP may be included to indicate the + * tx timestamp and %NL80211_ATTR_RX_HW_TIMESTAMP may be included to + * indicate the ack RX timestamp. * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for * backward compatibility. * @@ -2720,6 +2725,18 @@ enum nl80211_commands { * @NL80211_ATTR_EML_CAPABILITY: EML Capability information (u16) * @NL80211_ATTR_MLD_CAPA_AND_OPS: MLD Capabilities and Operations (u16) * + * @NL80211_ATTR_TX_HW_TIMESTAMP: Hardware timestamp for TX operation in + * nanoseconds (u64). This is the device clock timestamp so it will + * probably reset when the device is stopped or the firmware is reset. + * When used with %NL80211_CMD_FRAME_TX_STATUS, indicates the frame TX + * timestamp. When used with %NL80211_CMD_FRAME RX notification, indicates + * the ack TX timestamp. + * @NL80211_ATTR_RX_HW_TIMESTAMP: Hardware timestamp for RX operation in + * nanoseconds (u64). This is the device clock timestamp so it will + * probably reset when the device is stopped or the firmware is reset. + * When used with %NL80211_CMD_FRAME_TX_STATUS, indicates the ack RX + * timestamp. When used with %NL80211_CMD_FRAME RX notification, indicates + * the incoming frame RX timestamp. * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3245,6 +3262,9 @@ enum nl80211_attrs { NL80211_ATTR_EML_CAPABILITY, NL80211_ATTR_MLD_CAPA_AND_OPS, + NL80211_ATTR_TX_HW_TIMESTAMP, + NL80211_ATTR_RX_HW_TIMESTAMP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 95f498bb49f7030c1f40236107e5241e50f79ade Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Jul 2022 12:13:46 +0200 Subject: wifi: nl80211: add MLO link ID to the NL80211_CMD_FRAME TX API Allow optionally specifying the link ID to transmit on, which can be done instead of the link frequency, on an MLD addressed frame. Both can also be omitted in which case the frame must be MLD addressed and link selection (and address translation) will be done on lower layers. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5275dcbc5ee8..ffb7c573e299 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -764,6 +764,10 @@ * %NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA * counters which will be updated to the current value. This attribute * is used during CSA period. + * For TX on an MLD, the frequency can be omitted and the link ID be + * specified, or if transmitting to a known peer MLD (with MLD addresses + * in the frame) both can be omitted and the link will be selected by + * lower layers. * For RX notification, %NL80211_ATTR_RX_HW_TIMESTAMP may be included to * indicate the frame RX timestamp and %NL80211_ATTR_TX_HW_TIMESTAMP may * be included to indicate the ack TX timestamp. -- cgit v1.2.3 From 6d8c5afc9ab14595707ff25d971dde45728eba3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jul 2022 18:38:17 +0800 Subject: ublk_drv: make sure that correct flags(features) returned to userspace Userspace may support more features or new added flags, but the driver side can be old, so make sure correct flags(features) returned to userpsace, then userspace can work as expected. Also mark the 2nd flags as reversed, just use the 1st one. When we run out of flags, the reserved one can be handled at that time. Reviewed-by: Christoph Hellwig Reviewed-by: ZiyangZhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220722103817.631258-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 917580b34198..ca33092354ab 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -46,13 +46,13 @@ * zero copy requires 4k block size, and can remap ublk driver's io * request into ublksrv's vm space */ -#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) +#define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) /* * Force to complete io cmd via io_uring_cmd_complete_in_task so that * performance comparison is done easily with using task_work_add */ -#define UBLK_F_URING_CMD_COMP_IN_TASK (1UL << 1) +#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) /* device state */ #define UBLK_S_DEV_DEAD 0 @@ -88,7 +88,8 @@ struct ublksrv_ctrl_dev_info { __s32 ublksrv_pid; __s32 reserved0; - __u64 flags[2]; + __u64 flags; + __u64 flags_reserved; /* For ublksrv internal use, invisible to ublk driver */ __u64 ublksrv_flags; -- cgit v1.2.3 From b9ba8a4463cd78d0aee520c4bf2569820ac29929 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 May 2022 10:55:07 -0600 Subject: io_uring: add support for level triggered poll By default, the POLL_ADD command does edge triggered poll - if we get a non-zero mask on the initial poll attempt, we complete the request successfully. Support level triggered by always waiting for a notification, regardless of whether or not the initial mask matches the file state. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0ad3da28d2fc..4927bb69387a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -229,10 +229,13 @@ enum io_uring_op { * * IORING_POLL_UPDATE Update existing poll request, matching * sqe->addr as the old user_data field. + * + * IORING_POLL_LEVEL Level triggered poll. */ #define IORING_POLL_ADD_MULTI (1U << 0) #define IORING_POLL_UPDATE_EVENTS (1U << 1) #define IORING_POLL_UPDATE_USER_DATA (1U << 2) +#define IORING_POLL_ADD_LEVEL (1U << 3) /* * ASYNC_CANCEL flags. -- cgit v1.2.3 From 97bbdc06a4446bc69d8ba71d722abae542a6b70c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 16 Jun 2022 10:22:08 +0100 Subject: io_uring: add IORING_SETUP_SINGLE_ISSUER Add a new IORING_SETUP_SINGLE_ISSUER flag and the userspace visible part of it, i.e. put limitations of submitters. Also, don't allow it together with IOPOLL as we're not going to put it to good use. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4bcc41ee467fdf04c8aab8baf6ce3ba21858c3d4.1655371007.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4927bb69387a..d7ae81b10893 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -140,9 +140,12 @@ enum { * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) - #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ #define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ +/* + * Only one task is allowed to submit requests + */ +#define IORING_SETUP_SINGLE_ISSUER (1U << 12) enum io_uring_op { IORING_OP_NOP, -- cgit v1.2.3 From 7d8ca7250197096bfa9f432c1d99b0555504bbba Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 09:47:04 -0600 Subject: io_uring: add IORING_ASYNC_CANCEL_FD_FIXED cancel flag In preparation for not having a request to pass in that carries this state, add a separate cancelation flag that allows the caller to ask for a fixed file for cancelation. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d7ae81b10893..a09a78bd7556 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -247,10 +247,12 @@ enum io_uring_op { * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' * IORING_ASYNC_CANCEL_ANY Match any request + * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) +#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) -- cgit v1.2.3 From 78a861b9495920f8609dee5b670dacbff09d359f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 18 Jun 2022 10:00:50 -0600 Subject: io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index a09a78bd7556..094f706c93e0 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,6 +10,7 @@ #include #include +#include /* * IO submission data structure (Submission Queue Entry) @@ -428,6 +429,9 @@ enum { IORING_REGISTER_PBUF_RING = 22, IORING_UNREGISTER_PBUF_RING = 23, + /* sync cancelation API */ + IORING_REGISTER_SYNC_CANCEL = 24, + /* this goes last */ IORING_REGISTER_LAST }; @@ -563,4 +567,15 @@ struct io_uring_getevents_arg { __u64 ts; }; +/* + * Argument for IORING_REGISTER_SYNC_CANCEL + */ +struct io_uring_sync_cancel_reg { + __u64 addr; + __s32 fd; + __u32 flags; + struct __kernel_timespec timeout; + __u64 pad[4]; +}; + #endif -- cgit v1.2.3 From 8fcf4c48f44bd7b1b75db139f56ff1ad6477379e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 28 Jun 2022 21:33:20 +0200 Subject: io_uring: replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 094f706c93e0..8fe0275cdaf3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -495,7 +495,7 @@ struct io_uring_probe { __u8 ops_len; /* length of ops[] array below */ __u16 resv; __u32 resv2[3]; - struct io_uring_probe_op ops[0]; + struct io_uring_probe_op ops[]; }; struct io_uring_restriction { -- cgit v1.2.3 From e6130eba8a848a7a6ba6c534bd8f6d60749ae1a9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Jun 2022 04:47:02 -0600 Subject: io_uring: add support for passing fixed file descriptors With IORING_OP_MSG_RING, one ring can send a message to another ring. Extend that support to also allow sending a fixed file descriptor to that ring, enabling one ring to pass a registered descriptor to another one. Arguments are extended to pass in: sqe->addr3 fixed file slot in source ring sqe->file_index fixed file slot in destination ring IORING_OP_MSG_RING is extended to take a command argument in sqe->addr. If set to zero (or IORING_MSG_DATA), it sends just a message like before. If set to IORING_MSG_SEND_FD, a fixed file descriptor is sent according to the above arguments. Two common use cases for this are: 1) Server needs to be shutdown or restarted, pass file descriptors to another onei 2) Backend is split, and one accepts connections, while others then get the fd passed and handle the actual connection. Both of those are classic SCM_RIGHTS use cases, and it's not possible to support them with direct descriptors today. By default, this will post a CQE to the target ring, similarly to how IORING_MSG_DATA does it. If IORING_MSG_RING_CQE_SKIP is set, no message is posted to the target ring. The issuer is expected to notify the receiver side separately. Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 8fe0275cdaf3..f378eabbff21 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -51,6 +51,7 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; + __u32 msg_ring_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -270,6 +271,22 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) +/* + * IORING_OP_MSG_RING command types, stored in sqe->addr + */ +enum { + IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ + IORING_MSG_SEND_FD, /* send a registered fd to another ring */ +}; + +/* + * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) + * + * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not + * applicable for IORING_MSG_DATA, obviously. + */ +#define IORING_MSG_RING_CQE_SKIP (1U << 0) + /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 6e73dffbb93cb8797cd4e42e98d837edf0f1a967 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 25 Jun 2022 11:55:38 +0100 Subject: io_uring: let to set a range for file slot allocation From recently io_uring provides an option to allocate a file index for operation registering fixed files. However, it's utterly unusable with mixed approaches when for a part of files the userspace knows better where to place it, as it may race and users don't have any sane way to pick a slot and hoping it will not be taken. Let the userspace to register a range of fixed file slots in which the auto-allocation happens. The use case is splittting the fixed table in two parts, where on of them is used for auto-allocation and another for slot-specified operations. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/66ab0394e436f38437cf7c44676e1920d09687ad.1656154403.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f378eabbff21..cf95354198a3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -449,6 +449,9 @@ enum { /* sync cancelation API */ IORING_REGISTER_SYNC_CANCEL = 24, + /* register a range of fixed file slots for automatic slot allocation */ + IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* this goes last */ IORING_REGISTER_LAST }; @@ -595,4 +598,14 @@ struct io_uring_sync_cancel_reg { __u64 pad[4]; }; +/* + * Argument for IORING_REGISTER_FILE_ALLOC_RANGE + * The range is specified as [off, off + len) + */ +struct io_uring_file_index_range { + __u32 off; + __u32 len; + __u64 resv; +}; + #endif -- cgit v1.2.3 From b3fdea6ecb55c3ceea866ff66486927e51a982b3 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 30 Jun 2022 02:12:29 -0700 Subject: io_uring: multishot recv Support multishot receive for io_uring. Typical server applications will run a loop where for each recv CQE it requeues another recv/recvmsg. This can be simplified by using the existing multishot functionality combined with io_uring's provided buffers. The API is to add the IORING_RECV_MULTISHOT flag to the SQE. CQEs will then be posted (with IORING_CQE_F_MORE flag set) when data is available and is read. Once an error occurs or the socket ends, the multishot will be removed and a completion without IORING_CQE_F_MORE will be posted. The benefit to this is that the recv is much more performant. * Subsequent receives are queued up straight away without requiring the application to finish a processing loop. * If there are more data in the socket (sat the provided buffer size is smaller than the socket buffer) then the data is immediately returned, improving batching. * Poll is only armed once and reused, saving CPU cycles Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220630091231.1456789-11-dylany@fb.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index cf95354198a3..499679134961 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -263,8 +263,13 @@ enum io_uring_op { * or receive and arm poll if that yields an * -EAGAIN result, arm poll upfront and skip * the initial transfer attempt. + * + * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if + * the handler will continue to report + * CQEs on behalf of the same SQE. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) +#define IORING_RECV_MULTISHOT (1U << 1) /* * accept flags stored in sqe->ioprio -- cgit v1.2.3 From 9bb66906f23e50d6db1e11f7498b72dfca1982a2 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Thu, 14 Jul 2022 04:02:58 -0700 Subject: io_uring: support multishot in recvmsg Similar to multishot recv, this will require provided buffers to be used. However recvmsg is much more complex than recv as it has multiple outputs. Specifically flags, name, and control messages. Support this by introducing a new struct io_uring_recvmsg_out with 4 fields. namelen, controllen and flags match the similar out fields in msghdr from standard recvmsg(2), payloadlen is the length of the payload following the header. This struct is placed at the start of the returned buffer. Based on what the user specifies in struct msghdr, the next bytes of the buffer will be name (the next msg_namelen bytes), and then control (the next msg_controllen bytes). The payload will come at the end. The return value in the CQE is the total used size of the provided buffer. Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220714110258.1336200-4-dylany@fb.com [axboe: style fixups, see link] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 499679134961..4c9b11e2e991 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -613,4 +613,11 @@ struct io_uring_file_index_range { __u64 resv; }; +struct io_uring_recvmsg_out { + __u32 namelen; + __u32 controllen; + __u32 payloadlen; + __u32 flags; +}; + #endif -- cgit v1.2.3 From bc24d6bd32df0be19df3d30e74be4ba56493c0e2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:42 +0100 Subject: io_uring: add notification slot registration Let the userspace to register and unregister notification slots. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a0aa8161fe3ebb2a4cc6e5dbd0cffb96e6881cf5.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4c9b11e2e991..dcfc7a0bda0c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -457,6 +457,10 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* zerocopy notification API */ + IORING_REGISTER_NOTIFIERS = 26, + IORING_UNREGISTER_NOTIFIERS = 27, + /* this goes last */ IORING_REGISTER_LAST }; @@ -503,6 +507,19 @@ struct io_uring_rsrc_update2 { __u32 resv2; }; +struct io_uring_notification_slot { + __u64 tag; + __u64 resv[3]; +}; + +struct io_uring_notification_register { + __u32 nr_slots; + __u32 resv; + __u64 resv2; + __u64 data; + __u64 resv3; +}; + /* Skip updating fd indexes set to this value in the fd table */ #define IORING_REGISTER_FILES_SKIP (-2) -- cgit v1.2.3 From 06a5464be84e4ae48394d34441baf34bf9706827 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:43 +0100 Subject: io_uring: wire send zc request type Add a new io_uring opcode IORING_OP_SENDZC. The main distinction from IORING_OP_SEND is that the user should specify a notification slot index in sqe::notification_idx and the buffers are safe to reuse only when the used notification is flushed and completes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a80387c6a68ce9cf99b3b6ef6f71068468761fb7.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index dcfc7a0bda0c..82bf2991e9bd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -66,6 +66,10 @@ struct io_uring_sqe { union { __s32 splice_fd_in; __u32 file_index; + struct { + __u16 notification_idx; + __u16 __pad; + }; }; union { struct { @@ -197,6 +201,7 @@ enum io_uring_op { IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, + IORING_OP_SENDZC_NOTIF, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 092aeedb750a9fad0f0252d6067fc91d76ca44bd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:45 +0100 Subject: io_uring: allow to pass addr into sendzc Allow to specify an address to zerocopy sends making it more like sendto(2). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/70417a8f7c5b51ab454690bae08adc0c187f89e8.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 82bf2991e9bd..0736e2773a5d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -68,7 +68,7 @@ struct io_uring_sqe { __u32 file_index; struct { __u16 notification_idx; - __u16 __pad; + __u16 addr_len; }; }; union { -- cgit v1.2.3 From 10c7d33ecd51619e453cf6aeee8e326f8ba5cfea Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:46 +0100 Subject: io_uring: sendzc with fixed buffers Allow zerocopy sends to use fixed buffers. There is an optimisation for this case, the network layer don't need to reference the pages, see SKBFL_MANAGED_FRAG_REFS, so io_uring have to ensure validity of fixed buffers until the notifier is released. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e1d8bd1b5934e541d90c1824eb4020ae3f5f43f3.1657643355.git.asml.silence@gmail.com [axboe: fold in 32-bit pointer cast warning fix] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0736e2773a5d..f1a9ff9b9ea7 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -272,9 +272,13 @@ enum io_uring_op { * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if * the handler will continue to report * CQEs on behalf of the same SQE. + * + * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in + * the buf_index field. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) -#define IORING_RECV_MULTISHOT (1U << 1) +#define IORING_RECV_MULTISHOT (1U << 1) +#define IORING_RECVSEND_FIXED_BUF (1U << 2) /* * accept flags stored in sqe->ioprio -- cgit v1.2.3 From 63809137ebb58f0aa2ce359117422686e3304f45 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:47 +0100 Subject: io_uring: flush notifiers after sendzc Allow to flush notifiers as a part of sendzc request by setting IORING_SENDZC_FLUSH flag. When the sendzc request succeedes it will flush the used [active] notifier. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e0b4d9a6797e2fd6092824fe42953db7a519bbc8.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f1a9ff9b9ea7..45272eb37d10 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -275,10 +275,14 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. + * + * IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful + * successful. Only for zerocopy sends. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) +#define IORING_RECVSEND_NOTIF_FLUSH (1U << 3) /* * accept flags stored in sqe->ioprio -- cgit v1.2.3 From 4379d5f15b3fd4224c37841029178aa8082a242e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:48 +0100 Subject: io_uring: rename IORING_OP_FILES_UPDATE IORING_OP_FILES_UPDATE will be a more generic opcode serving different resource types, rename it into IORING_OP_RSRC_UPDATE and add subtype handling. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0a907133907d9af3415a8a7aa1802c6aa97c03c6.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 45272eb37d10..210a00ab6301 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -174,7 +174,8 @@ enum io_uring_op { IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, - IORING_OP_FILES_UPDATE, + IORING_OP_RSRC_UPDATE, + IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, @@ -223,6 +224,7 @@ enum io_uring_op { #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) + /* * sqe->splice_flags * extends splice(2) flags @@ -289,6 +291,14 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) + +/* + * IORING_OP_RSRC_UPDATE flags + */ +enum { + IORING_RSRC_UPDATE_FILES, +}; + /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ -- cgit v1.2.3 From 492dddb4f6e3a5839c27d41ff1fecdbe6c3ab851 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:49 +0100 Subject: io_uring: add zc notification flush requests Overlay notification control onto IORING_OP_RSRC_UPDATE (former IORING_OP_FILES_UPDATE). It allows to flush a range of zc notifications from slots with indexes [sqe->off, sqe->off+sqe->len). If sqe->arg is not zero, it also copies sqe->arg as a new tag for all flushed notifications. Note, it doesn't flush a notification of a slot if there was no requests attached to it (since last flush or registration). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/df13e2363400682a73dd9e71c3b990b8d1ff0333.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 210a00ab6301..1463cfecb56b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -297,6 +297,7 @@ enum io_uring_op { */ enum { IORING_RSRC_UPDATE_FILES, + IORING_RSRC_UPDATE_NOTIF, }; /* -- cgit v1.2.3 From b4023554b1fb49f73a09e5f346a5facbf27d7383 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 25 Jul 2022 09:58:35 +0200 Subject: USB: cdc: add control-signal defines Add defines for the Control Signal Bitmap Values from section 6.2.14 SetControlLineState of the CDC specification version 1.1. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220725075841.1187-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/cdc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h index 6d61550959ef..372c81425cae 100644 --- a/include/uapi/linux/usb/cdc.h +++ b/include/uapi/linux/usb/cdc.h @@ -271,6 +271,10 @@ struct usb_cdc_line_coding { __u8 bDataBits; } __attribute__ ((packed)); +/* Control Signal Bitmap Values from 6.2.14 SetControlLineState */ +#define USB_CDC_CTRL_DTR (1 << 0) +#define USB_CDC_CTRL_RTS (1 << 1) + /* table 62; bits in multicast filter */ #define USB_CDC_PACKET_TYPE_PROMISCUOUS (1 << 0) #define USB_CDC_PACKET_TYPE_ALL_MULTICAST (1 << 1) /* no filter */ -- cgit v1.2.3 From a0a3202b44a9fdf2a1f6330a0d176aee76c8631d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 25 Jul 2022 09:58:36 +0200 Subject: USB: cdc: add serial-state defines Add defines for the serial-state bitmap values from section 6.3.5 SerialState of the CDC specification version 1.1. Note that the bTxCarrier and bRxCarrier bits have been named after their RS-232 signal equivalents DSR and DCD. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20220725075841.1187-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/cdc.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h index 372c81425cae..78caa9bdc4ae 100644 --- a/include/uapi/linux/usb/cdc.h +++ b/include/uapi/linux/usb/cdc.h @@ -306,6 +306,15 @@ struct usb_cdc_notification { __le16 wLength; } __attribute__ ((packed)); +/* UART State Bitmap Values from 6.3.5 SerialState */ +#define USB_CDC_SERIAL_STATE_DCD (1 << 0) +#define USB_CDC_SERIAL_STATE_DSR (1 << 1) +#define USB_CDC_SERIAL_STATE_BREAK (1 << 2) +#define USB_CDC_SERIAL_STATE_RING_SIGNAL (1 << 3) +#define USB_CDC_SERIAL_STATE_FRAMING (1 << 4) +#define USB_CDC_SERIAL_STATE_PARITY (1 << 5) +#define USB_CDC_SERIAL_STATE_OVERRUN (1 << 6) + struct usb_cdc_speed_change { __le32 DLBitRRate; /* contains the downlink bit rate (IN pipe) */ __le32 ULBitRate; /* contains the uplink bit rate (OUT pipe) */ -- cgit v1.2.3 From b7c14f23fb604fc66edae7514ed9b4b93930b5ba Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 17 Mar 2022 10:25:39 -0700 Subject: btrfs: send: add stream v2 definitions This adds the definitions of the new commands for send stream version 2 and their respective attributes: fallocate, FS_IOC_SETFLAGS (a.k.a. chattr), and encoded writes. It also documents two changes to the send stream format in v2: the receiver shouldn't assume a maximum command size, and the DATA attribute is encoded differently to allow for writes larger than 64k. These will be implemented in subsequent changes, and then the ioctl will accept the new version and flag. Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index d956b2993970..b6f26a434b10 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -777,6 +777,13 @@ struct btrfs_ioctl_received_subvol_args { */ #define BTRFS_SEND_FLAG_VERSION 0x8 +/* + * Send compressed data using the ENCODED_WRITE command instead of decompressing + * the data and sending it with the WRITE command. This requires protocol + * version >= 2. + */ +#define BTRFS_SEND_FLAG_COMPRESSED 0x10 + #define BTRFS_SEND_FLAG_MASK \ (BTRFS_SEND_FLAG_NO_FILE_DATA | \ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \ -- cgit v1.2.3 From d6815592806f481244d0e3435ca1f5383d90a14c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 17 Mar 2022 10:25:43 -0700 Subject: btrfs: send: enable support for stream v2 and compressed writes Now that the new support is implemented, allow the ioctl to accept v2 and the compressed flag, and update the version in sysfs. Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index b6f26a434b10..f54dc91e4025 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -788,7 +788,8 @@ struct btrfs_ioctl_received_subvol_args { (BTRFS_SEND_FLAG_NO_FILE_DATA | \ BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \ BTRFS_SEND_FLAG_OMIT_END_CMD | \ - BTRFS_SEND_FLAG_VERSION) + BTRFS_SEND_FLAG_VERSION | \ + BTRFS_SEND_FLAG_COMPRESSED) struct btrfs_ioctl_send_args { __s64 send_fd; /* in */ -- cgit v1.2.3 From 5008750eff5d4af8a3aed4a7567c4cfb2b3cb156 Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Mon, 18 Jul 2022 14:18:11 +0200 Subject: net/sched: flower: Add PPPoE filter Add support for PPPoE specific fields for tc-flower. Those fields can be provided only when protocol was set to ETH_P_PPP_SES. Defines, dump, load and set are being done here. Overwrite basic.n_proto only in case of PPP_IP and PPP_IPV6, otherwise leave it as ETH_P_PPP_SES. Signed-off-by: Wojciech Drewek Acked-by: Guillaume Nault Signed-off-by: Tony Nguyen --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 9a2ee1e39fad..c142c0f8ed8a 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -589,6 +589,9 @@ enum { TCA_FLOWER_KEY_NUM_OF_VLANS, /* u8 */ + TCA_FLOWER_KEY_PPPOE_SID, /* be16 */ + TCA_FLOWER_KEY_PPP_PROTO, /* be16 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 7b2d9a1a50ec3bedf067fe234a4a71196c89e826 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jul 2022 10:29:16 +0200 Subject: net: devlink: introduce nested devlink entity for line card For the purpose of exposing device info and allow flash update which is going to be implemented in follow-up patches, introduce a possibility for a line card to expose relation to nested devlink entity. The nested devlink entity represents the line card. Example: $ devlink lc show pci/0000:01:00.0 lc 1 pci/0000:01:00.0: lc 1 state active type 16x100G nested_devlink auxiliary/mlxsw_core.lc.0 supported_types: 16x100G $ devlink dev show auxiliary/mlxsw_core.lc.0 auxiliary/mlxsw_core.lc.0 Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Reviewed-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3d40a5d72ff..541321695f52 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -576,6 +576,8 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ + DEVLINK_ATTR_NESTED_DEVLINK, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From a0c0c44e9aa2b5da876467083c359b368f3ce95e Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 20 Jul 2022 12:43:28 +0000 Subject: s390: add ELF note type for encrypted CPU state of a PV VCPU The type NT_S390_PV_CPU_DATA note contains the encrypted CPU state of a PV VCPU. It's only relevant in dumps of s390 PV VMs and can't be decrypted without a second block of encrypted data which provides key parts. Therefore we only reserve the note type here. The zgetdump tool from the s390-tools package can, together with a Customer Communication Key, be used to convert a PV VM dump into a normal VM dump. zgetdump will decrypt the CPU data and overwrite the other respective notes to make the data accessible for crash and other debugging tools. Signed-off-by: Janosch Frank Acked-by: Heiko Carstens [agordeev@linux.ibm.com changed desctiption] Signed-off-by: Alexander Gordeev --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 2b9f5e9985e5..c7b056af9ef0 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -420,6 +420,7 @@ typedef struct elf64_shdr { #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ #define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ #define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ +#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ -- cgit v1.2.3 From 9dd1cd3220eca534f2d47afad7ce85f4c40118d8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 20 Jul 2022 13:58:04 -0400 Subject: dm: fix dm-raid crash if md_handle_request() splits bio Commit ca522482e3eaf ("dm: pass NULL bdev to bio_alloc_clone") introduced the optimization to _not_ perform bio_associate_blkg()'s relatively costly work when DM core clones its bio. But in doing so it exposed the possibility for DM's cloned bio to alter DM target behavior (e.g. crash) if a target were to issue IO without first calling bio_set_dev(). The DM raid target can trigger an MD crash due to its need to split the DM bio that is passed to md_handle_request(). The split will recurse to submit_bio_noacct() using a bio with an uninitialized ->bi_blkg. This NULL bio->bi_blkg causes blk_throtl_bio() to dereference a NULL blkg_to_tg(bio->bi_blkg). Fix this in DM core by adding a new 'needs_bio_set_dev' target flag that will make alloc_tio() call bio_set_dev() on behalf of the target. dm-raid is the only target that requires this flag. bio_set_dev() initializes the DM cloned bio's ->bi_blkg, using bio_associate_blkg, before passing the bio to md_handle_request(). Long-term fix would be to audit and refactor MD code to rely on DM to split its bio, using dm_accept_partial_bio(), but there are MD raid personalities (e.g. raid1 and raid10) whose implementation are tightly coupled to handling the bio splitting inline. Fixes: ca522482e3eaf ("dm: pass NULL bdev to bio_alloc_clone") Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- include/uapi/linux/dm-ioctl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 2e9550fef90f..27ad9671f2df 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 46 +#define DM_VERSION_MINOR 47 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2022-02-22)" +#define DM_VERSION_EXTRA "-ioctl (2022-07-28)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 08f588fa301bef264576fc915da6bf31b585a824 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Wed, 27 Jul 2022 22:27:20 +0530 Subject: devlink: introduce framework for selftests Add a framework for running selftests. Framework exposes devlink commands and test suite(s) to the user to execute and query the supported tests by the driver. Below are new entries in devlink_nl_ops devlink_nl_cmd_selftests_show_doit/dumpit: To query the supported selftests by the drivers. devlink_nl_cmd_selftests_run: To execute selftests. Users can provide a test mask for executing group tests or standalone tests. Documentation/networking/devlink/ path is already part of MAINTAINERS & the new files come under this path. Hence no update needed to the MAINTAINERS Signed-off-by: Vikas Gupta Reviewed-by: Andy Gospodarek Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 541321695f52..2f24b53a87a5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,6 +136,9 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, + DEVLINK_CMD_SELFTESTS_GET, /* can dump */ + DEVLINK_CMD_SELFTESTS_RUN, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -276,6 +279,30 @@ enum { #define DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS \ (_BITUL(__DEVLINK_FLASH_OVERWRITE_MAX_BIT) - 1) +enum devlink_attr_selftest_id { + DEVLINK_ATTR_SELFTEST_ID_UNSPEC, + DEVLINK_ATTR_SELFTEST_ID_FLASH, /* flag */ + + __DEVLINK_ATTR_SELFTEST_ID_MAX, + DEVLINK_ATTR_SELFTEST_ID_MAX = __DEVLINK_ATTR_SELFTEST_ID_MAX - 1 +}; + +enum devlink_selftest_status { + DEVLINK_SELFTEST_STATUS_SKIP, + DEVLINK_SELFTEST_STATUS_PASS, + DEVLINK_SELFTEST_STATUS_FAIL +}; + +enum devlink_attr_selftest_result { + DEVLINK_ATTR_SELFTEST_RESULT_UNSPEC, + DEVLINK_ATTR_SELFTEST_RESULT, /* nested */ + DEVLINK_ATTR_SELFTEST_RESULT_ID, /* u32, enum devlink_attr_selftest_id */ + DEVLINK_ATTR_SELFTEST_RESULT_STATUS, /* u8, enum devlink_selftest_status */ + + __DEVLINK_ATTR_SELFTEST_RESULT_MAX, + DEVLINK_ATTR_SELFTEST_RESULT_MAX = __DEVLINK_ATTR_SELFTEST_RESULT_MAX - 1 +}; + /** * enum devlink_trap_action - Packet trap action. * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not @@ -578,6 +605,8 @@ enum devlink_attr { DEVLINK_ATTR_NESTED_DEVLINK, /* nested */ + DEVLINK_ATTR_SELFTESTS, /* nested */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, -- cgit v1.2.3 From b07c8cdbe918aa17da864da9a89b22afaed0393e Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 27 Jul 2022 20:54:05 +0200 Subject: seg6: add support for SRv6 H.Encaps.Red behavior The SRv6 H.Encaps.Red behavior described in [1] is an optimization of the SRv6 H.Encaps behavior [2]. H.Encaps.Red reduces the length of the SRH by excluding the first segment (SID) in the SRH of the pushed IPv6 header. The first SID is only placed in the IPv6 Destination Address field of the pushed IPv6 header. When the SRv6 Policy only contains one SID the SRH is omitted, unless there is an HMAC TLV to be carried. [1] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.2 [2] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.1 Signed-off-by: Andrea Mayer Signed-off-by: Anton Makarov Signed-off-by: David S. Miller --- include/uapi/linux/seg6_iptunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index eb815e0d0ac3..538152a7b2c3 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -35,6 +35,7 @@ enum { SEG6_IPTUN_MODE_INLINE, SEG6_IPTUN_MODE_ENCAP, SEG6_IPTUN_MODE_L2ENCAP, + SEG6_IPTUN_MODE_ENCAP_RED, }; #endif -- cgit v1.2.3 From 13f0296be8ece1189cbc4383a45ba97cafaecc09 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 27 Jul 2022 20:54:06 +0200 Subject: seg6: add support for SRv6 H.L2Encaps.Red behavior The SRv6 H.L2Encaps.Red behavior described in [1] is an optimization of the SRv6 H.L2Encaps behavior [2]. H.L2Encaps.Red reduces the length of the SRH by excluding the first segment (SID) in the SRH of the pushed IPv6 header. The first SID is only placed in the IPv6 Destination Address field of the pushed IPv6 header. When the SRv6 Policy only contains one SID the SRH is omitted, unless there is an HMAC TLV to be carried. [1] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.4 [2] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.3 Signed-off-by: Andrea Mayer Signed-off-by: Anton Makarov Signed-off-by: David S. Miller --- include/uapi/linux/seg6_iptunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index 538152a7b2c3..a9fa777f16de 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -36,6 +36,7 @@ enum { SEG6_IPTUN_MODE_ENCAP, SEG6_IPTUN_MODE_L2ENCAP, SEG6_IPTUN_MODE_ENCAP_RED, + SEG6_IPTUN_MODE_L2ENCAP_RED, }; #endif -- cgit v1.2.3 From 8a061562e2f2b32bfb5bff5bf3afc64e37d95a27 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 29 Jul 2022 17:14:53 +0530 Subject: RISC-V: KVM: Add extensible CSR emulation framework We add an extensible CSR emulation framework which is based upon the existing system instruction emulation. This will be useful to upcoming AIA, PMU, Nested and other virtualization features. The CSR emulation framework also has provision to emulate CSR in user space but this will be used only in very specific cases such as AIA IMSIC CSR emulation in user space or vendor specific CSR emulation in user space. By default, all CSRs not handled by KVM RISC-V will be redirected back to Guest VCPU as illegal instruction trap. Signed-off-by: Anup Patel Signed-off-by: Anup Patel --- include/uapi/linux/kvm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 860f867c50c0..0c1f42a40fd3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -270,6 +270,7 @@ struct kvm_xen_exit { #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 +#define KVM_EXIT_RISCV_CSR 36 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -496,6 +497,13 @@ struct kvm_run { unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_RISCV_CSR */ + struct { + unsigned long csr_num; + unsigned long new_value; + unsigned long write_mask; + unsigned long ret_value; + } riscv_csr; /* Fix the size of the union. */ char padding[256]; }; -- cgit v1.2.3 From 68f2736a858324c3ec852f6c2cddd9d1c777357d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:38:48 -0400 Subject: mm: Convert all PageMovable users to movable_operations These drivers are rather uncomfortably hammered into the address_space_operations hole. They aren't filesystems and don't behave like filesystems. They just need their own movable_operations structure, which we can point to directly from page->mapping. Signed-off-by: Matthew Wilcox (Oracle) --- include/uapi/linux/magic.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f724129c0425..6325d1d0e90f 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -98,12 +98,8 @@ /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 -#define BALLOON_KVM_MAGIC 0x13661366 -#define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ -#define Z3FOLD_MAGIC 0x33 -#define PPC_CMM_MAGIC 0xc7571590 #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ -- cgit v1.2.3 From 0aa73170eba5eae638c1b96a05eba533f030b5cb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 30 Jul 2022 17:27:49 +0800 Subject: ublk_drv: add SET_PARAMS/GET_PARAMS control command Add two commands to set/get parameters generically. One important goal of ublk is to provide generic framework for making block device by userspace flexibly. As one generic block device, there are still lots of block parameters, such as max_sectors, write_cache/fua, discard related limits, zoned parameters, ...., so this patch starts to add generic mechanism for set/get device parameters. Both generic block parameters(all kinds of queue settings) and ublk feature parameters can be covered with this way, then it becomes quite easy to extend in future. Add two parameter types are used so far: basic(covers basic queue setting and misc settings which can't be grouped easily) and discard, basic type must be set, and discard type becomes optional now This way provides mechanism to simulate any kind of generic block device from userspace easily, from both block queue setting viewpoint or ublk feature viewpoint. The style of putting all parameters together is suggested by Christoph. Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220730092750.1118167-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index ca33092354ab..54d065426f06 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -15,6 +15,8 @@ #define UBLK_CMD_DEL_DEV 0x05 #define UBLK_CMD_START_DEV 0x06 #define UBLK_CMD_STOP_DEV 0x07 +#define UBLK_CMD_SET_PARAMS 0x08 +#define UBLK_CMD_GET_PARAMS 0x09 /* * IO commands, issued by ublk server, and handled by ublk driver. @@ -158,4 +160,49 @@ struct ublksrv_io_cmd { __u64 addr; }; +struct ublk_param_basic { +#define UBLK_ATTR_READ_ONLY (1 << 0) +#define UBLK_ATTR_ROTATIONAL (1 << 1) +#define UBLK_ATTR_VOLATILE_CACHE (1 << 2) +#define UBLK_ATTR_FUA (1 << 3) + __u32 attrs; + __u8 logical_bs_shift; + __u8 physical_bs_shift; + __u8 io_opt_shift; + __u8 io_min_shift; + + __u32 max_sectors; + __u32 chunk_sectors; + + __u64 dev_sectors; + __u64 virt_boundary_mask; +}; + +struct ublk_param_discard { + __u32 discard_alignment; + + __u32 discard_granularity; + __u32 max_discard_sectors; + + __u32 max_write_zeroes_sectors; + __u16 max_discard_segments; + __u16 reserved0; +}; + +struct ublk_params { + /* + * Total length of parameters, userspace has to set 'len' for both + * SET_PARAMS and GET_PARAMS command, and driver may update len + * if two sides use different version of 'ublk_params', same with + * 'types' fields. + */ + __u32 len; +#define UBLK_PARAM_TYPE_BASIC (1 << 0) +#define UBLK_PARAM_TYPE_DISCARD (1 << 1) + __u32 types; /* types of parameter included */ + + struct ublk_param_basic basic; + struct ublk_param_discard discard; +}; + #endif -- cgit v1.2.3 From 4bf9cbf3e93426e9ebe136dabd6ca392ca92cfcb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 30 Jul 2022 17:27:50 +0800 Subject: ublk_drv: cleanup ublksrv_ctrl_dev_info Remove all block device related info from ublksrv_ctrl_dev_info, meantime reduce its size into 64 bytes because: 1) ublksrv_ctrl_dev_info becomes cleaner without including any block related info 2) generic set/get parameter command can be used to set block related setting easily and cleanly 3) generic set/get parameter command can be used for extending ublk without needing more info in ublksrv_ctrl_dev_info Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220730092750.1118167-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 54d065426f06..57d86d0e8c5b 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -80,22 +80,23 @@ struct ublksrv_ctrl_cmd { struct ublksrv_ctrl_dev_info { __u16 nr_hw_queues; __u16 queue_depth; - __u16 block_size; __u16 state; + __u16 pad0; - __u32 rq_max_blocks; + __u32 max_io_buf_bytes; __u32 dev_id; - __u64 dev_blocks; - __s32 ublksrv_pid; - __s32 reserved0; + __u32 pad1; + __u64 flags; - __u64 flags_reserved; /* For ublksrv internal use, invisible to ublk driver */ __u64 ublksrv_flags; - __u64 reserved1[9]; + + __u64 reserved0; + __u64 reserved1; + __u64 reserved2; }; #define UBLK_IO_OP_READ 0 -- cgit v1.2.3 From 4e18403d9485a43e1b54397df258b8df7dac9a83 Mon Sep 17 00:00:00 2001 From: ZiyangZhang Date: Thu, 28 Jul 2022 20:39:15 +0800 Subject: ublk_cmd.h: add one new ublk command: UBLK_IO_NEED_GET_DATA Add one new ublk command: UBLK_IO_NEED_GET_DATA. It is prepared for a new feature designed for a user application who wants to allocate IO buffer and set IO buffer address only after it receives an IO request from ublksrv. Reviewed-by: Ming Lei Signed-off-by: ZiyangZhang Link: https://lore.kernel.org/r/c8a64b6b51c78340da7daa9e1054608695e79619.1659011443.git.ZiyangZhang@linux.alibaba.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 57d86d0e8c5b..677edaab2b66 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -30,12 +30,21 @@ * this IO request, request's handling result is committed to ublk * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be * handled before completing io request. + * + * NEED_GET_DATA: only used for write requests to set io addr and copy data + * When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA + * command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA. + * + * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag + * while starting a ublk device. */ #define UBLK_IO_FETCH_REQ 0x20 #define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 +#define UBLK_IO_NEED_GET_DATA 0x22 /* only ABORT means that no re-fetch */ #define UBLK_IO_RES_OK 0 +#define UBLK_IO_RES_NEED_GET_DATA 1 #define UBLK_IO_RES_ABORT (-ENODEV) #define UBLKSRV_CMD_BUF_OFFSET 0 @@ -56,6 +65,15 @@ */ #define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) +/* + * User should issue io cmd again for write requests to + * set io buffer address and copy data from bio vectors + * to the userspace io buffer. + * + * In this mode, task_work is not used. + */ +#define UBLK_F_NEED_GET_DATA (1UL << 2) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 36d763509be326bb383b1b1852a129ff58d74e3b Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Wed, 27 Jul 2022 17:40:53 +0200 Subject: xfrm: fix XFRMA_LASTUSED comment It is a __u64, internally time64_t. Fixes: bf825f81b454 ("xfrm: introduce basic mark infrastructure") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 65e13a099b1a..a9f5d884560a 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -296,7 +296,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ - XFRMA_LASTUSED, /* unsigned long */ + XFRMA_LASTUSED, /* __u64 */ XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ -- cgit v1.2.3 From 23339e5752d01a4b5e122759b002cf896d26f6c1 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 1 Aug 2022 10:08:08 -0700 Subject: f2fs: revive F2FS_IOC_ABORT_VOLATILE_WRITE F2FS_IOC_ABORT_VOLATILE_WRITE was used to abort a atomic write before. However it was removed accidentally. So revive it by changing the name, since volatile write had gone. Signed-off-by: Daeho Jeong Fiexes: 7bc155fec5b3("f2fs: kill volatile write support") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/uapi/linux/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 352a822d4370..3121d127d5aa 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -13,7 +13,7 @@ #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) -#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_ABORT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) #define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ -- cgit v1.2.3 From ca34ce29fc4b0e929cc6aada40829d17ab50fee4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 8 Aug 2022 09:47:23 -0700 Subject: bpf: Improve docstring for BPF_F_USER_BUILD_ID flag Most tools which use bpf_get_stack or bpf_get_stackid symbolicate the stack - meaning the stack of addresses in the target process' address space is transformed into meaningful symbol names. The BPF_F_USER_BUILD_ID flag eases this process by finding the build_id of the file-backed vma which the address falls in and translating the address to an offset within the backing file. To be more specific, the offset is a "file offset" from the beginning of the backing file. The symbols in ET_DYN ELF objects have a st_value which is also described as an "offset" - but an offset in the process address space, relative to the base address of the object. It's necessary to translate between the "file offset" and "virtual address offset" during symbolication before they can be directly compared. Failure to do so can lead to confusing bugs, so this patch clarifies language in the documentation in an attempt to keep this from happening. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220808164723.3107500-1-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be..534e33fb1029 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3008,8 +3008,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject -- cgit v1.2.3 From c8996c98f703b09afe77a1d247dae691c9849dc1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Aug 2022 08:08:02 +0200 Subject: bpf: Add BPF-helper for accessing CLOCK_TAI Commit 3dc6ffae2da2 ("timekeeping: Introduce fast accessor to clock tai") introduced a fast and NMI-safe accessor for CLOCK_TAI. Especially in time sensitive networks (TSN), where all nodes are synchronized by Precision Time Protocol (PTP), it's helpful to have the possibility to generate timestamps based on CLOCK_TAI instead of CLOCK_MONOTONIC. With a BPF helper for TAI in place, it becomes very convenient to correlate activity across different machines in the network. Use cases for such a BPF helper include functionalities such as Tx launch time (e.g. ETF and TAPRIO Qdiscs) and timestamping. Note: CLOCK_TAI is nothing new per se, only the NMI-safe variant of it is. Signed-off-by: Jesper Dangaard Brouer [Kurt: Wrote changelog and renamed helper] Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20220809060803.5773-2-kurt@linutronix.de Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 534e33fb1029..7d1e2794d83e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5341,6 +5341,18 @@ union bpf_attr { * **-EACCES** if the SYN cookie is not valid. * * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5551,6 +5563,7 @@ union bpf_attr { FN(tcp_raw_gen_syncookie_ipv6), \ FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ + FN(ktime_get_tai_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 134941683b89d05b5e5c28c817c95049ba409d01 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Aug 2022 17:39:20 +0200 Subject: netfilter: ip6t_LOG: Fix a typo in a comment s/_IPT_LOG_H/_IP6T_LOG_H/ While at it add some surrounding space to ease reading. Signed-off-by: Christophe JAILLET Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter_ipv6/ip6t_LOG.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h b/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h index 23e91a9c2583..0b7b16dbdec2 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h @@ -17,4 +17,4 @@ struct ip6t_log_info { char prefix[30]; }; -#endif /*_IPT_LOG_H*/ +#endif /* _IP6T_LOG_H */ -- cgit v1.2.3 From f329a0ebeaba4ffe91d431e0ac1ca7f9165872a4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 9 Aug 2022 16:27:40 -0700 Subject: genetlink: correct uAPI defines Commit 50a896cf2d6f ("genetlink: properly support per-op policy dumping") seems to have copy'n'pasted things a little incorrectly. The #define CTRL_ATTR_MCAST_GRP_MAX should have stayed right after the previous enum. The new CTRL_ATTR_POLICY_* needs its own define for MAX and that max should not contain the superfluous _DUMP in the name. We probably can't do anything about the CTRL_ATTR_POLICY_DUMP_MAX any more, there's likely code which uses it. For consistency (*cough* codegen *cough*) let's add the correctly name define nonetheless. Signed-off-by: Jakub Kicinski Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- include/uapi/linux/genetlink.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index d83f214b4134..ddba3ca01e39 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -87,6 +87,8 @@ enum { __CTRL_ATTR_MCAST_GRP_MAX, }; +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + enum { CTRL_ATTR_POLICY_UNSPEC, CTRL_ATTR_POLICY_DO, @@ -96,7 +98,6 @@ enum { CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 }; -#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) - +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) #endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ -- cgit v1.2.3 From ea024594b1dc5b6719c1400ae154690f5c203996 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:45 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_notify_data Add queue_notify_data in struct virtio_pci_common_cfg, which comes from here https://github.com/oasis-tcs/virtio-spec/issues/89 In order not to affect the API, add a dedicated structure struct virtio_pci_modern_common_cfg to virtio_pci_modern.h. Since I want to add queue_reset after queue_notify_data, I submitted this patch first. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-26-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 3a86f36d7e3d..f5981a874481 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -202,6 +202,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_AVAILHI 44 #define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDHI 52 +#define VIRTIO_PCI_COMMON_Q_NDATA 56 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From d94587b5bb5c4bba32fbc2bd92c86cc6de25167f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:47 +0800 Subject: virtio: queue_reset: add VIRTIO_F_RING_RESET Added VIRTIO_F_RING_RESET, it came from here https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 This feature indicates that the driver can reset a queue individually. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-28-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_config.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index f0fb0ae021c0..3c05162bc988 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 38 +#define VIRTIO_TRANSPORT_F_END 41 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -98,4 +98,9 @@ * Does the device support Single Root I/O Virtualization? */ #define VIRTIO_F_SR_IOV 37 + +/* + * This feature indicates that the driver can reset a queue individually. + */ +#define VIRTIO_F_RING_RESET 40 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From 0cdd450e70510c9e13af8099e9f6c1467e6a0b91 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Mon, 1 Aug 2022 14:38:49 +0800 Subject: virtio_pci: struct virtio_pci_common_cfg add queue_reset Add queue_reset in virtio_pci_modern_common_cfg. https://github.com/oasis-tcs/virtio-spec/issues/124 https://github.com/oasis-tcs/virtio-spec/issues/139 Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Message-Id: <20220801063902.129329-30-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index f5981a874481..f703afc7ad31 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -203,6 +203,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 +#define VIRTIO_PCI_COMMON_Q_RESET 58 #endif /* VIRTIO_PCI_NO_MODERN */ -- cgit v1.2.3 From 699b045a8e43bd1063db4795be685bfd659649dc Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Mon, 18 Jul 2022 12:11:02 +0300 Subject: net: virtio_net: notifications coalescing support New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. Control a Virtio network device notifications coalescing parameters using the control virtqueue. A device that supports this fetature can receive VIRTIO_NET_CTRL_NOTF_COAL control commands. - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: Ask the network device to change the following parameters: - tx_usecs: Maximum number of usecs to delay a TX notification. - tx_max_packets: Maximum number of packets to send before a TX notification. - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: Ask the network device to change the following parameters: - rx_usecs: Maximum number of usecs to delay a RX notification. - rx_max_packets: Maximum number of packets to receive before a RX notification. VirtIO spec. patch: https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html Signed-off-by: Alvaro Karsz Message-Id: <20220718091102.498774-1-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Jakub Kicinski Acked-by: Jason Wang --- include/uapi/linux/virtio_net.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 3f55a4215f11..29ced55514d4 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ - +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -355,4 +355,36 @@ struct virtio_net_hash_config { #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 +/* + * Control notifications coalescing. + * + * Request the device to change the notifications coalescing parameters. + * + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. + */ +#define VIRTIO_NET_CTRL_NOTF_COAL 6 +/* + * Set the tx-usecs/tx-max-packets patameters. + * tx-usecs - Maximum number of usecs to delay a TX notification. + * tx-max-packets - Maximum number of packets to send before a TX notification. + */ +struct virtio_net_ctrl_coal_tx { + __le32 tx_max_packets; + __le32 tx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 + +/* + * Set the rx-usecs/rx-max-packets patameters. + * rx-usecs - Maximum number of usecs to delay a RX notification. + * rx-max-frames - Maximum number of packets to receive before a RX notification. + */ +struct virtio_net_ctrl_coal_rx { + __le32 rx_max_packets; + __le32 rx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 + #endif /* _UAPI_LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 79a463be9e0051997508d52cf411ed5e91d657f6 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 3 Aug 2022 12:55:22 +0800 Subject: vduse: Support registering userspace memory for IOVA regions Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM to support registering and de-registering userspace memory for IOVA regions. Now it only supports registering userspace memory for bounce buffer region in virtio-vdpa case. Signed-off-by: Xie Yongji Acked-by: Jason Wang Message-Id: <20220803045523.23851-5-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vduse.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 7cfe1c1280c0..9885e0571f09 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -210,6 +210,29 @@ struct vduse_vq_eventfd { */ #define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) +/** + * struct vduse_iova_umem - userspace memory configuration for one IOVA region + * @uaddr: start address of userspace memory, it must be aligned to page size + * @iova: start of the IOVA region + * @size: size of the IOVA region + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM + * ioctls to register/de-register userspace memory for IOVA regions + */ +struct vduse_iova_umem { + __u64 uaddr; + __u64 iova; + __u64 size; + __u64 reserved[3]; +}; + +/* Register userspace memory for IOVA regions */ +#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem) + +/* De-register the userspace memory. Caller should set iova and size field. */ +#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** -- cgit v1.2.3 From ad146355bfad627bd0717ece73997c6c93b1b82e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 3 Aug 2022 12:55:23 +0800 Subject: vduse: Support querying information of IOVA regions This introduces a new ioctl: VDUSE_IOTLB_GET_INFO to support querying some information of IOVA regions. Now it can be used to query whether the IOVA region supports userspace memory registration. Signed-off-by: Xie Yongji Message-Id: <20220803045523.23851-6-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/vduse.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h index 9885e0571f09..11bd48c72c6c 100644 --- a/include/uapi/linux/vduse.h +++ b/include/uapi/linux/vduse.h @@ -233,6 +233,30 @@ struct vduse_iova_umem { /* De-register the userspace memory. Caller should set iova and size field. */ #define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem) +/** + * struct vduse_iova_info - information of one IOVA region + * @start: start of the IOVA region + * @last: last of the IOVA region + * @capability: capability of the IOVA regsion + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of + * one IOVA region. + */ +struct vduse_iova_info { + __u64 start; + __u64 last; +#define VDUSE_IOVA_CAP_UMEM (1 << 0) + __u64 capability; + __u64 reserved[3]; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return some information on it. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info) + /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /** -- cgit v1.2.3 From 0723f1df5c3ec8a1112d150dab98e149361ef488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Wed, 10 Aug 2022 19:15:10 +0200 Subject: vhost-vdpa: introduce SUSPEND backend feature bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userland knows if it can suspend the device or not by checking this feature bit. It's only offered if the vdpa driver backend implements the suspend() operation callback, and to offer it or userland to ack it if the backend does not offer that callback is an error. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-3-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 634cee485abb..1bdd6e363f4c 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range { * message */ #define VHOST_BACKEND_F_IOTLB_ASID 0x3 +/* Device can be suspended */ +#define VHOST_BACKEND_F_SUSPEND 0x4 #endif -- cgit v1.2.3 From f345a0143b4dd1cfc850009c6979a3801b86a06f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Wed, 10 Aug 2022 19:15:11 +0200 Subject: vhost-vdpa: uAPI to suspend the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ioctl adds support for suspending the device from userspace. This is a must before getting virtqueue indexes (base) for live migration, since the device could modify them after userland gets them. There are individual ways to perform that action for some devices (VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no way to perform it for any vhost device (and, in particular, vhost-vdpa). After a successful return of the ioctl call the device must not process more virtqueue descriptors. The device can answer to read or writes of config fields as if it were not suspended. In particular, writing to "queue_enable" with a value of 1 will not make the device start processing buffers of the virtqueue. Signed-off-by: Eugenio Pérez Message-Id: <20220810171512.2343333-4-eperezma@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index cab645d4a645..f9f115a7c75b 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -171,4 +171,13 @@ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ struct vhost_vring_state) +/* Suspend a device so it does not process virtqueue requests anymore + * + * After the return of ioctl the device must preserve all the necessary state + * (the virtqueue vring base plus the possible device specific states) that is + * required for restoring in the future. The device must not change its + * configuration after that point. + */ +#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D) + #endif -- cgit v1.2.3 From c2e75634cbe368065f140dd30bf8b1a0355158fd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Aug 2022 09:45:47 -0700 Subject: net: atm: bring back zatm uAPI Jiri reports that linux-atm does not build without this header. Bring it back. It's completely dead code but we can't break the build for user space :( Reported-by: Jiri Slaby Fixes: 052e1f01bfae ("net: atm: remove support for ZeitNet ZN122x ATM devices") Link: https://lore.kernel.org/all/8576aef3-37e4-8bae-bab5-08f82a78efd3@kernel.org/ Link: https://lore.kernel.org/r/20220810164547.484378-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/atm_zatm.h | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/uapi/linux/atm_zatm.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h new file mode 100644 index 000000000000..5135027b93c1 --- /dev/null +++ b/include/uapi/linux/atm_zatm.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* atm_zatm.h - Driver-specific declarations of the ZATM driver (for use by + driver-specific utilities) */ + +/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ + + +#ifndef LINUX_ATM_ZATM_H +#define LINUX_ATM_ZATM_H + +/* + * Note: non-kernel programs including this file must also include + * sys/types.h for struct timeval + */ + +#include +#include + +#define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) + /* get pool statistics */ +#define ZATM_GETPOOLZ _IOW('a',ATMIOC_SARPRV+2,struct atmif_sioc) + /* get statistics and zero */ +#define ZATM_SETPOOL _IOW('a',ATMIOC_SARPRV+3,struct atmif_sioc) + /* set pool parameters */ + +struct zatm_pool_info { + int ref_count; /* free buffer pool usage counters */ + int low_water,high_water; /* refill parameters */ + int rqa_count,rqu_count; /* queue condition counters */ + int offset,next_off; /* alignment optimizations: offset */ + int next_cnt,next_thres; /* repetition counter and threshold */ +}; + +struct zatm_pool_req { + int pool_num; /* pool number */ + struct zatm_pool_info info; /* actual information */ +}; + +#define ZATM_OAM_POOL 0 /* free buffer pool for OAM cells */ +#define ZATM_AAL0_POOL 1 /* free buffer pool for AAL0 cells */ +#define ZATM_AAL5_POOL_BASE 2 /* first AAL5 free buffer pool */ +#define ZATM_LAST_POOL ZATM_AAL5_POOL_BASE+10 /* max. 64 kB */ + +#define ZATM_TIMER_HISTORY_SIZE 16 /* number of timer adjustments to + record; must be 2^n */ + +#endif -- cgit v1.2.3 From 4961d0772578e8737afe61370743f3bc22867111 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 12 Aug 2022 16:37:27 +0100 Subject: bpf: Clear up confusion in bpf_skb_adjust_room()'s documentation Adding or removing room space _below_ layers 2 or 3, as the description mentions, is ambiguous. This was written with a mental image of the packet with layer 2 at the top, layer 3 under it, and so on. But it has led users to believe that it was on lower layers (before the beginning of the L2 and L3 headers respectively). Let's make it more explicit, and specify between which layers the room space is adjusted. Reported-by: Rumen Telbizov Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220812153727.224500-3-quentin@isovalent.com --- include/uapi/linux/bpf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7d1e2794d83e..934a2a8beb87 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2573,10 +2573,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * -- cgit v1.2.3 From 5c669c4a4c6aa0489848093c93b8029f5c5c75ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 10 Aug 2022 11:40:03 +0200 Subject: virtio: kerneldocs fixes and enhancements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix variable names in some kerneldocs, naming in others. Add kerneldocs for struct vring_desc and vring_interrupt. Signed-off-by: Ricardo Cañuelo Message-Id: <20220810094004.1250-2-ricardo.canuelo@collabora.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_ring.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 476d3e5c0fe7..f8c20d3de8da 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -93,15 +93,21 @@ #define VRING_USED_ALIGN_SIZE 4 #define VRING_DESC_ALIGN_SIZE 16 -/* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ +/** + * struct vring_desc - Virtio ring descriptors, + * 16 bytes long. These can chain together via @next. + * + * @addr: buffer address (guest-physical) + * @len: buffer length + * @flags: descriptor flags + * @next: index of the next descriptor in the chain, + * if the VRING_DESC_F_NEXT flag is set. We chain unused + * descriptors via this, too. + */ struct vring_desc { - /* Address (guest-physical). */ __virtio64 addr; - /* Length. */ __virtio32 len; - /* The flags as indicated above. */ __virtio16 flags; - /* We chain unused descriptors via this, too */ __virtio16 next; }; -- cgit v1.2.3 From 3024d95a4c521c278a7504ee9e80c57c3a9750e0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 17 Aug 2022 23:32:09 +0200 Subject: bpf: Partially revert flexible-array member replacement Partially revert 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") given it breaks BPF UAPI. For example, BPF CI run reveals build breakage under LLVM: [...] CLNG-BPF [test_maps] map_ptr_kern.o CLNG-BPF [test_maps] btf__core_reloc_arrays___diff_arr_val_sz.o CLNG-BPF [test_maps] test_bpf_cookie.o progs/map_ptr_kern.c:314:26: error: field 'trie_key' with variable sized type 'struct bpf_lpm_trie_key' not at the end of a struct or class is a GNU extension [-Werror,-Wgnu-variable-sized-type-not-at-end] struct bpf_lpm_trie_key trie_key; ^ CLNG-BPF [test_maps] btf__core_reloc_type_based___diff.o 1 error generated. make: *** [Makefile:521: /tmp/runner/work/bpf/bpf/tools/testing/selftests/bpf/map_ptr_kern.o] Error 1 make: *** Waiting for unfinished jobs.... [...] Typical usage of the bpf_lpm_trie_key is that the struct gets embedded into a user defined key for the LPM BPF map, from the selftest example: struct bpf_lpm_trie_key { <-- UAPI exported struct __u32 prefixlen; __u8 data[]; }; struct lpm_key { <-- BPF program defined struct struct bpf_lpm_trie_key trie_key; __u32 data; }; Undo this for BPF until a different solution can be found. It's the only flexible- array member case in the UAPI header. This was discovered in BPF CI after Dave reported that the include/uapi/linux/bpf.h header was out of sync with tools/include/uapi/linux/bpf.h after 94dfc73e7cf4. And the subsequent sync attempt failed CI. Fixes: 94dfc73e7cf4 ("treewide: uapi: Replace zero-length arrays with flexible-array members") Reported-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Link: https://lore.kernel.org/bpf/22aebc88-da67-f086-e620-dd4a16e2bc69@iogearbox.net --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be..59a217ca2dfd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -79,7 +79,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[]; /* Arbitrary size */ + __u8 data[0]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { -- cgit v1.2.3 From 5d81757835859760a38a54a87eff856d4e578836 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Thu, 18 Aug 2022 18:32:30 +0300 Subject: net: macsec: Expose MACSEC_SALT_LEN definition to user space Expose MACSEC_SALT_LEN definition to user space to be used in various user space applications such as iproute. Iproute will use this as part of adding macsec extended packet number support. Reviewed-by: Raed Salem Reviewed-by: Sabrina Dubroca Signed-off-by: Emeel Hakim Link: https://lore.kernel.org/r/20220818153229.4721-1-ehakim@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_macsec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 3af2aa069a36..d5b6d1f37353 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -22,6 +22,8 @@ #define MACSEC_KEYID_LEN 16 +#define MACSEC_SALT_LEN 12 + /* cipher IDs as per IEEE802.1AE-2018 (Table 14-1) */ #define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL #define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL -- cgit v1.2.3 From 43cc0ec38131c10557c771760ffdfdb74a2da155 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 11 Jul 2022 12:21:10 +0200 Subject: media: v4l2-ctrls: add change flag for when dimensions change Add a new V4L2_EVENT_CTRL_CH_DIMENSIONS change flag that is issued when the dimensions of an array change as a result of a __v4l2_ctrl_modify_dimensions() call. This will inform userspace that there are new dimensions. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 01e630f2ec78..c415ce5b6829 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -2435,6 +2435,7 @@ struct v4l2_event_vsync { #define V4L2_EVENT_CTRL_CH_VALUE (1 << 0) #define V4L2_EVENT_CTRL_CH_FLAGS (1 << 1) #define V4L2_EVENT_CTRL_CH_RANGE (1 << 2) +#define V4L2_EVENT_CTRL_CH_DIMENSIONS (1 << 3) struct v4l2_event_ctrl { __u32 changes; -- cgit v1.2.3 From 1202cdd665315c525b5237e96e0bedc76d7e754f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 17 Aug 2022 17:43:21 -0700 Subject: Remove DECnet support from kernel DECnet is an obsolete network protocol that receives more attention from kernel janitors than users. It belongs in computer protocol history museum not in Linux kernel. It has been "Orphaned" in kernel since 2010. The iproute2 support for DECnet was dropped in 5.0 release. The documentation link on Sourceforge says it is abandoned there as well. Leave the UAPI alone to keep userspace programs compiling. This means that there is still an empty neighbour table for AF_DECNET. The table of /proc/sys/net entries was updated to match current directories and reformatted to be alphabetical. Signed-off-by: Stephen Hemminger Acked-by: David Ahern Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/dn.h | 149 ---------------------------------- include/uapi/linux/netfilter_decnet.h | 72 ---------------- include/uapi/linux/netlink.h | 2 +- 3 files changed, 1 insertion(+), 222 deletions(-) delete mode 100644 include/uapi/linux/dn.h delete mode 100644 include/uapi/linux/netfilter_decnet.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dn.h b/include/uapi/linux/dn.h deleted file mode 100644 index 36ca71bd8bbe..000000000000 --- a/include/uapi/linux/dn.h +++ /dev/null @@ -1,149 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _LINUX_DN_H -#define _LINUX_DN_H - -#include -#include -#include - -/* - - DECnet Data Structures and Constants - -*/ - -/* - * DNPROTO_NSP can't be the same as SOL_SOCKET, - * so increment each by one (compared to ULTRIX) - */ -#define DNPROTO_NSP 2 /* NSP protocol number */ -#define DNPROTO_ROU 3 /* Routing protocol number */ -#define DNPROTO_NML 4 /* Net mgt protocol number */ -#define DNPROTO_EVL 5 /* Evl protocol number (usr) */ -#define DNPROTO_EVR 6 /* Evl protocol number (evl) */ -#define DNPROTO_NSPT 7 /* NSP trace protocol number */ - - -#define DN_ADDL 2 -#define DN_MAXADDL 2 /* ULTRIX headers have 20 here, but pathworks has 2 */ -#define DN_MAXOPTL 16 -#define DN_MAXOBJL 16 -#define DN_MAXACCL 40 -#define DN_MAXALIASL 128 -#define DN_MAXNODEL 256 -#define DNBUFSIZE 65023 - -/* - * SET/GET Socket options - must match the DSO_ numbers below - */ -#define SO_CONDATA 1 -#define SO_CONACCESS 2 -#define SO_PROXYUSR 3 -#define SO_LINKINFO 7 - -#define DSO_CONDATA 1 /* Set/Get connect data */ -#define DSO_DISDATA 10 /* Set/Get disconnect data */ -#define DSO_CONACCESS 2 /* Set/Get connect access data */ -#define DSO_ACCEPTMODE 4 /* Set/Get accept mode */ -#define DSO_CONACCEPT 5 /* Accept deferred connection */ -#define DSO_CONREJECT 6 /* Reject deferred connection */ -#define DSO_LINKINFO 7 /* Set/Get link information */ -#define DSO_STREAM 8 /* Set socket type to stream */ -#define DSO_SEQPACKET 9 /* Set socket type to sequenced packet */ -#define DSO_MAXWINDOW 11 /* Maximum window size allowed */ -#define DSO_NODELAY 12 /* Turn off nagle */ -#define DSO_CORK 13 /* Wait for more data! */ -#define DSO_SERVICES 14 /* NSP Services field */ -#define DSO_INFO 15 /* NSP Info field */ -#define DSO_MAX 15 /* Maximum option number */ - - -/* LINK States */ -#define LL_INACTIVE 0 -#define LL_CONNECTING 1 -#define LL_RUNNING 2 -#define LL_DISCONNECTING 3 - -#define ACC_IMMED 0 -#define ACC_DEFER 1 - -#define SDF_WILD 1 /* Wild card object */ -#define SDF_PROXY 2 /* Addr eligible for proxy */ -#define SDF_UICPROXY 4 /* Use uic-based proxy */ - -/* Structures */ - - -struct dn_naddr { - __le16 a_len; - __u8 a_addr[DN_MAXADDL]; /* Two bytes little endian */ -}; - -struct sockaddr_dn { - __u16 sdn_family; - __u8 sdn_flags; - __u8 sdn_objnum; - __le16 sdn_objnamel; - __u8 sdn_objname[DN_MAXOBJL]; - struct dn_naddr sdn_add; -}; -#define sdn_nodeaddrl sdn_add.a_len /* Node address length */ -#define sdn_nodeaddr sdn_add.a_addr /* Node address */ - - - -/* - * DECnet set/get DSO_CONDATA, DSO_DISDATA (optional data) structure - */ -struct optdata_dn { - __le16 opt_status; /* Extended status return */ -#define opt_sts opt_status - __le16 opt_optl; /* Length of user data */ - __u8 opt_data[16]; /* User data */ -}; - -struct accessdata_dn { - __u8 acc_accl; - __u8 acc_acc[DN_MAXACCL]; - __u8 acc_passl; - __u8 acc_pass[DN_MAXACCL]; - __u8 acc_userl; - __u8 acc_user[DN_MAXACCL]; -}; - -/* - * DECnet logical link information structure - */ -struct linkinfo_dn { - __u16 idn_segsize; /* Segment size for link */ - __u8 idn_linkstate; /* Logical link state */ -}; - -/* - * Ethernet address format (for DECnet) - */ -union etheraddress { - __u8 dne_addr[ETH_ALEN]; /* Full ethernet address */ - struct { - __u8 dne_hiord[4]; /* DECnet HIORD prefix */ - __u8 dne_nodeaddr[2]; /* DECnet node address */ - } dne_remote; -}; - - -/* - * DECnet physical socket address format - */ -struct dn_addr { - __le16 dna_family; /* AF_DECnet */ - union etheraddress dna_netaddr; /* DECnet ethernet address */ -}; - -#define DECNET_IOCTL_BASE 0x89 /* PROTOPRIVATE range */ - -#define SIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, struct dn_naddr) -#define SIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, struct dn_naddr) -#define OSIOCSNETADDR _IOW(DECNET_IOCTL_BASE, 0xe0, int) -#define OSIOCGNETADDR _IOR(DECNET_IOCTL_BASE, 0xe1, int) - -#endif /* _LINUX_DN_H */ diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h deleted file mode 100644 index 3c77f54560f2..000000000000 --- a/include/uapi/linux/netfilter_decnet.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_DECNET_NETFILTER_H -#define __LINUX_DECNET_NETFILTER_H - -/* DECnet-specific defines for netfilter. - * This file (C) Steve Whitehouse 1999 derived from the - * ipv4 netfilter header file which is - * (C)1998 Rusty Russell -- This code is GPL. - */ - -#include - -/* only for userspace compatibility */ -#ifndef __KERNEL__ - -#include /* for INT_MIN, INT_MAX */ - -/* kernel define is in netfilter_defs.h */ -#define NF_DN_NUMHOOKS 7 -#endif /* ! __KERNEL__ */ - -/* DECnet Hooks */ -/* After promisc drops, checksum checks. */ -#define NF_DN_PRE_ROUTING 0 -/* If the packet is destined for this box. */ -#define NF_DN_LOCAL_IN 1 -/* If the packet is destined for another interface. */ -#define NF_DN_FORWARD 2 -/* Packets coming from a local process. */ -#define NF_DN_LOCAL_OUT 3 -/* Packets about to hit the wire. */ -#define NF_DN_POST_ROUTING 4 -/* Input Hello Packets */ -#define NF_DN_HELLO 5 -/* Input Routing Packets */ -#define NF_DN_ROUTE 6 - -enum nf_dn_hook_priorities { - NF_DN_PRI_FIRST = INT_MIN, - NF_DN_PRI_CONNTRACK = -200, - NF_DN_PRI_MANGLE = -150, - NF_DN_PRI_NAT_DST = -100, - NF_DN_PRI_FILTER = 0, - NF_DN_PRI_NAT_SRC = 100, - NF_DN_PRI_DNRTMSG = 200, - NF_DN_PRI_LAST = INT_MAX, -}; - -struct nf_dn_rtmsg { - int nfdn_ifindex; -}; - -#define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) - -#ifndef __KERNEL__ -/* backwards compatibility for userspace */ -#define DNRMG_L1_GROUP 0x01 -#define DNRMG_L2_GROUP 0x02 -#endif - -enum { - DNRNG_NLGRP_NONE, -#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE - DNRNG_NLGRP_L1, -#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 - DNRNG_NLGRP_L2, -#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 - __DNRNG_NLGRP_MAX -}; -#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) - -#endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 855dffb4c1c3..1e543cf0568c 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -20,7 +20,7 @@ #define NETLINK_CONNECTOR 11 #define NETLINK_NETFILTER 12 /* netfilter subsystem */ #define NETLINK_IP6_FW 13 -#define NETLINK_DNRTMSG 14 /* DECnet routing messages */ +#define NETLINK_DNRTMSG 14 /* DECnet routing messages (obsolete) */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ #define NETLINK_GENERIC 16 /* leave room for NETLINK_DM (DM Events) */ -- cgit v1.2.3 From c6ea70604249bc357ce09e9f8e16c29df0fb2fa2 Mon Sep 17 00:00:00 2001 From: "dougmill@linux.vnet.ibm.com" Date: Tue, 16 Aug 2022 15:07:13 +0100 Subject: block: sed-opal: Add ioctl to return device status Provide a mechanism to retrieve basic status information about the device, including the "supported" flag indicating whether SED-OPAL is supported. The information returned is from the various feature descriptors received during the discovery0 step, and so this ioctl does nothing more than perform the discovery0 step and then save the information received. See "struct opal_status" and OPAL_FL_* bits for the status information currently returned. This is necessary to be able to check whether a device is OPAL enabled, set up, locked or unlocked from userspace programs like systemd-cryptsetup and libcryptsetup. Right now we just have to assume the user 'knows' or blindly attempt setup/lock/unlock operations. Signed-off-by: Douglas Miller Tested-by: Luca Boccassi Reviewed-by: Scott Bauer Acked-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220816140713.84893-1-luca.boccassi@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/sed-opal.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 6f5af1a84213..2573772e2fb3 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -132,6 +132,18 @@ struct opal_read_write_table { __u64 priv; }; +#define OPAL_FL_SUPPORTED 0x00000001 +#define OPAL_FL_LOCKING_SUPPORTED 0x00000002 +#define OPAL_FL_LOCKING_ENABLED 0x00000004 +#define OPAL_FL_LOCKED 0x00000008 +#define OPAL_FL_MBR_ENABLED 0x00000010 +#define OPAL_FL_MBR_DONE 0x00000020 + +struct opal_status { + __u32 flags; + __u32 reserved; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -148,5 +160,6 @@ struct opal_read_write_table { #define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done) #define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr) #define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table) +#define IOC_OPAL_GET_STATUS _IOR('p', 236, struct opal_status) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From e1d0c6d05afdcff01ace698edb3b8808db1dc066 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Tue, 23 Aug 2022 18:45:49 +0700 Subject: io_uring: uapi: Add `extern "C"` in io_uring.h for liburing Make it easy for liburing to integrate uapi header with the kernel. Previously, when this header changes, the liburing side can't directly copy this header file due to some small differences. Sync them. Link: https://lore.kernel.org/io-uring/f1feef16-6ea2-0653-238f-4aaee35060b6@kernel.dk Cc: Bart Van Assche Cc: Dylan Yudaken Cc: Facebook Kernel Team Signed-off-by: Ammar Faizi Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1463cfecb56b..9e0b5c8d92ce 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -12,6 +12,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /* * IO submission data structure (Submission Queue Entry) */ @@ -661,4 +665,8 @@ struct io_uring_recvmsg_out { __u32 flags; }; +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3 From 12cda13cfd5310bbfefdfe32a82489228e2e0381 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 15 Aug 2022 15:43:25 -0400 Subject: fs: dlm: remove DLM_LSFL_FS from uapi The DLM_LSFL_FS flag is set in lockspaces created directly for a kernel user, as opposed to those lockspaces created for user space applications. The user space libdlm allowed this flag to be set for lockspaces created from user space, but then used by a kernel user. No kernel user has ever used this method, so remove the ability to do it. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/uapi/linux/dlm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index 0d2eca287567..1923f4f3b05e 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -69,7 +69,6 @@ struct dlm_lksb { /* dlm_new_lockspace() flags */ #define DLM_LSFL_TIMEWARN 0x00000002 -#define DLM_LSFL_FS 0x00000004 #define DLM_LSFL_NEWEXCL 0x00000008 -- cgit v1.2.3 From 91350fe152930c0d61a362af68272526490efea5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Sun, 21 Aug 2022 14:35:17 +0300 Subject: bpf, flow_dissector: Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode for bpf progs Currently, attaching BPF_PROG_TYPE_FLOW_DISSECTOR programs completely replaces the flow-dissector logic with custom dissection logic. This forces implementors to write programs that handle dissection for any flows expected in the namespace. It makes sense for flow-dissector BPF programs to just augment the dissector with custom logic (e.g. dissecting certain flows or custom protocols), while enjoying the broad capabilities of the standard dissector for any other traffic. Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode. Flow-dissector BPF programs may return this to indicate no dissection was made, and fallback to the standard dissector is requested. Signed-off-by: Shmulik Ladkani Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220821113519.116765-3-shmulik.ladkani@gmail.com --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 934a2a8beb87..7f87012b012e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5861,6 +5861,11 @@ enum bpf_ret_code { * represented by BPF_REDIRECT above). */ BPF_LWT_REROUTE = 128, + /* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR + * to indicate that no custom dissection was performed, and + * fallback to standard dissector is requested. + */ + BPF_FLOW_DISSECTOR_CONTINUE = 129, }; struct bpf_sock { -- cgit v1.2.3 From 2172fb8007eaafbef18563afb6c1ae5a976bf787 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 23 Aug 2022 15:25:54 -0700 Subject: bpf: update bpf_{g,s}et_retval documentation * replace 'syscall' with 'upper layers', still mention that it's being exported via syscall errno * describe what happens in set_retval(-EPERM) + return 1 * describe what happens with bind's 'return 3' Acked-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220823222555.523590-5-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7f87012b012e..644600dbb114 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5085,17 +5085,29 @@ union bpf_attr { * * int bpf_get_retval(void) * Description - * Get the syscall's return value that will be returned to userspace. + * Get the BPF program's return value that will be returned to the upper layers. * - * This helper is currently supported by cgroup programs only. + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. * Return - * The syscall's return value. + * The BPF program's return value. * * int bpf_set_retval(int retval) * Description - * Set the syscall's return value that will be returned to userspace. + * Set the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * + * Note that there is the following corner case where the program exports an error + * via bpf_set_retval but signals success via 'return 1': + * + * bpf_set_retval(-EPERM); + * return 1; + * + * In this case, the BPF program's return value will use helper's -EPERM. This + * still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case. * - * This helper is currently supported by cgroup programs only. * Return * 0 on success, or a negative error in case of failure. * -- cgit v1.2.3 From 30b6055428a90cc52d4add164df12b94ab07c3fd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Aug 2022 13:02:20 -0700 Subject: net: improve and fix netlink kdoc Subsequent patch will render the kdoc from include/uapi/linux/netlink.h into Documentation. We need to fix the warnings. While at it move the comments on struct nlmsghdr to a proper kdoc comment. Link: https://lore.kernel.org/r/20220819200221.422801-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netlink.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 1e543cf0568c..e0ab261ceca2 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -41,12 +41,20 @@ struct sockaddr_nl { __u32 nl_groups; /* multicast groups mask */ }; +/** + * struct nlmsghdr - fixed format metadata header of Netlink messages + * @nlmsg_len: Length of message including header + * @nlmsg_type: Message content type + * @nlmsg_flags: Additional flags + * @nlmsg_seq: Sequence number + * @nlmsg_pid: Sending process port ID + */ struct nlmsghdr { - __u32 nlmsg_len; /* Length of message including header */ - __u16 nlmsg_type; /* Message content */ - __u16 nlmsg_flags; /* Additional flags */ - __u32 nlmsg_seq; /* Sequence number */ - __u32 nlmsg_pid; /* Sending process port ID */ + __u32 nlmsg_len; + __u16 nlmsg_type; + __u16 nlmsg_flags; + __u32 nlmsg_seq; + __u32 nlmsg_pid; }; /* Flags values */ @@ -337,6 +345,9 @@ enum netlink_attribute_type { * bitfield32 type (U32) * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment + * + * @__NL_POLICY_TYPE_ATTR_MAX: number of attributes + * @NL_POLICY_TYPE_ATTR_MAX: highest attribute number */ enum netlink_policy_type_attr { NL_POLICY_TYPE_ATTR_UNSPEC, -- cgit v1.2.3 From e7a7b84e33178db4a839c5e1773247be17597c1f Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Sat, 30 Jul 2022 10:56:43 +0530 Subject: wifi: cfg80211: Add link_id parameter to various key operations for MLO Add support for various key operations on MLD by adding new parameter link_id. Pass the link_id received from userspace to driver for add_key, get_key, del_key, set_default_key, set_default_mgmt_key and set_default_beacon_key to support configuring keys specific to each MLO link. Userspace must not specify link ID for MLO pairwise key since it is common for all the MLO links. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20220730052643.1959111-4-quic_vjakkam@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ffb7c573e299..573db20403dc 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -377,14 +377,22 @@ * the non-transmitting interfaces are deleted as well. * * @NL80211_CMD_GET_KEY: Get sequence counter information for a key specified - * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. + * by %NL80211_ATTR_KEY_IDX and/or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC + * represents peer's MLD address for MLO pairwise key. For MLO group key, + * the link is identified by %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_SET_KEY: Set key attributes %NL80211_ATTR_KEY_DEFAULT, * %NL80211_ATTR_KEY_DEFAULT_MGMT, or %NL80211_ATTR_KEY_THRESHOLD. + * For MLO connection, the link to set default key is identified by + * %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_NEW_KEY: add a key with given %NL80211_ATTR_KEY_DATA, * %NL80211_ATTR_KEY_IDX, %NL80211_ATTR_MAC, %NL80211_ATTR_KEY_CIPHER, - * and %NL80211_ATTR_KEY_SEQ attributes. + * and %NL80211_ATTR_KEY_SEQ attributes. %NL80211_ATTR_MAC represents + * peer's MLD address for MLO pairwise key. The link to add MLO + * group key is identified by %NL80211_ATTR_MLO_LINK_ID. * @NL80211_CMD_DEL_KEY: delete a key identified by %NL80211_ATTR_KEY_IDX - * or %NL80211_ATTR_MAC. + * or %NL80211_ATTR_MAC. %NL80211_ATTR_MAC represents peer's MLD address + * for MLO pairwise key. The link to delete group key is identified by + * %NL80211_ATTR_MLO_LINK_ID. * * @NL80211_CMD_GET_BEACON: (not used) * @NL80211_CMD_SET_BEACON: change the beacon on an access point interface -- cgit v1.2.3 From d4ccaf58a8472123ac97e6db03932c375b5c45ba Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Wed, 24 Aug 2022 16:31:13 -0700 Subject: bpf: Introduce cgroup iter Cgroup_iter is a type of bpf_iter. It walks over cgroups in four modes: - walking a cgroup's descendants in pre-order. - walking a cgroup's descendants in post-order. - walking a cgroup's ancestors. - process only the given cgroup. When attaching cgroup_iter, one can set a cgroup to the iter_link created from attaching. This cgroup is passed as a file descriptor or cgroup id and serves as the starting point of the walk. If no cgroup is specified, the starting point will be the root cgroup v2. For walking descendants, one can specify the order: either pre-order or post-order. For walking ancestors, the walk starts at the specified cgroup and ends at the root. One can also terminate the walk early by returning 1 from the iter program. Note that because walking cgroup hierarchy holds cgroup_mutex, the iter program is called with cgroup_mutex held. Currently only one session is supported, which means, depending on the volume of data bpf program intends to send to user space, the number of cgroups that can be walked is limited. For example, given the current buffer size is 8 * PAGE_SIZE, if the program sends 64B data for each cgroup, assuming PAGE_SIZE is 4kb, the total number of cgroups that can be walked is 512. This is a limitation of cgroup_iter. If the output data is larger than the kernel buffer size, after all data in the kernel buffer is consumed by user space, the subsequent read() syscall will signal EOPNOTSUPP. In order to work around, the user may have to update their program to reduce the volume of data sent to output. For example, skip some uninteresting cgroups. In future, we may extend bpf_iter flags to allow customizing buffer size. Acked-by: Yonghong Song Acked-by: Tejun Heo Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220824233117.1312810-2-haoluo@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 644600dbb114..0f61f09f467a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; +enum bpf_cgroup_iter_order { + BPF_ITER_ORDER_UNSPEC = 0, + BPF_ITER_SELF_ONLY, /* process only a single object. */ + BPF_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_ITER_ANCESTORS_UP, /* walk ancestors upward. */ +}; + union bpf_iter_link_info { struct { __u32 map_fd; } map; + struct { + enum bpf_cgroup_iter_order order; + + /* At most one of cgroup_fd and cgroup_id can be non-zero. If + * both are zero, the walk starts from the default cgroup v2 + * root. For walking v1 hierarchy, one should always explicitly + * specify cgroup_fd. + */ + __u32 cgroup_fd; + __u64 cgroup_id; + } cgroup; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -6176,11 +6195,22 @@ struct bpf_link_info { struct { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ + + /* If the iter specific field is 32 bits, it can be put + * in the first or second union. Otherwise it should be + * put in the second union. + */ union { struct { __u32 map_id; } map; }; + union { + struct { + __u64 cgroup_id; + __u32 order; + } cgroup; + }; } iter; struct { __u32 netns_ino; -- cgit v1.2.3 From d4ffb6f39f1a1b260966b43a4ffdb64779c650dd Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 25 Aug 2022 15:39:36 -0700 Subject: bpf: Add CGROUP prefix to cgroup_iter_order bpf_cgroup_iter_order is globally visible but the entries do not have CGROUP prefix. As requested by Andrii, put a CGROUP in the names in bpf_cgroup_iter_order. This patch fixes two previous commits: one introduced the API and the other uses the API in bpf selftest (that is, the selftest cgroup_hierarchical_stats). I tested this patch via the following command: test_progs -t cgroup,iter,btf_dump Fixes: d4ccaf58a847 ("bpf: Introduce cgroup iter") Fixes: 88886309d2e8 ("selftests/bpf: add a selftest for cgroup hierarchical stats collection") Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220825223936.1865810-1-haoluo@google.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f61f09f467a..bdf4bc6d8d6b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -88,11 +88,11 @@ struct bpf_cgroup_storage_key { }; enum bpf_cgroup_iter_order { - BPF_ITER_ORDER_UNSPEC = 0, - BPF_ITER_SELF_ONLY, /* process only a single object. */ - BPF_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ - BPF_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ - BPF_ITER_ANCESTORS_UP, /* walk ancestors upward. */ + BPF_CGROUP_ITER_ORDER_UNSPEC = 0, + BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */ + BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ }; union bpf_iter_link_info { -- cgit v1.2.3 From 93315e46b000fc80fff5d53c3f444417fb3df6de Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 11 Aug 2022 18:00:00 +0530 Subject: perf/core: Add speculation info to branch entries Add a new "spec" bitfield to branch entries for providing speculation information. This will be populated using hints provided by branch sampling features on supported hardware. The following cases are covered: * No branch speculation information is available * Branch is speculative but taken on the wrong path * Branch is non-speculative but taken on the correct path * Branch is speculative and taken on the correct path Suggested-by: Stephane Eranian Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/834088c302faf21c7b665031dd111f424e509a64.1660211399.git.sandipan.das@amd.com --- include/uapi/linux/perf_event.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 03b370062741..30a4723aefd4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -256,6 +256,17 @@ enum { PERF_BR_MAX, }; +/* + * Common branch speculation outcome classification + */ +enum { + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1363,6 +1374,7 @@ union perf_mem_data_src { * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) * type: branch type + * spec: branch speculation info (or 0 if not supported) */ struct perf_branch_entry { __u64 from; @@ -1373,7 +1385,8 @@ struct perf_branch_entry { abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ type:4, /* branch type */ - reserved:40; + spec:2, /* branch speculation info */ + reserved:38; }; union perf_sample_weight { -- cgit v1.2.3 From 54c4ef34c4b6f9720fded620e2893894f9f2c554 Mon Sep 17 00:00:00 2001 From: Andrey Zhadchenko Date: Thu, 25 Aug 2022 05:04:49 +0300 Subject: openvswitch: allow specifying ifindex of new interfaces CRIU is preserving ifindexes of net devices after restoration. However, current Open vSwitch API does not allow to target ifindex, so we cannot correctly restore OVS configuration. Add new OVS_DP_ATTR_IFINDEX for OVS_DP_CMD_NEW and use it as desired ifindex. Use OVS_VPORT_ATTR_IFINDEX during OVS_VPORT_CMD_NEW to specify new netdev ifindex. Signed-off-by: Andrey Zhadchenko Acked-by: Christian Brauner (Microsoft) Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ce3e1738d427..94066f87e9ee 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -76,6 +76,8 @@ enum ovs_datapath_cmd { * datapath. Always present in notifications. * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the * datapath. Always present in notifications. + * @OVS_DP_ATTR_IFINDEX: Interface index for a new datapath netdev. Only + * valid for %OVS_DP_CMD_NEW requests. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. @@ -92,6 +94,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_PER_CPU_PIDS, /* Netlink PIDS to receive upcalls in * per-cpu dispatch mode */ + OVS_DP_ATTR_IFINDEX, __OVS_DP_ATTR_MAX }; -- cgit v1.2.3 From aa75622c3be4d5819ce69c714acbcbd67bba5d65 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 25 Aug 2022 23:08:06 +0100 Subject: bpf: Fix a few typos in BPF helpers documentation Address a few typos in the documentation for the BPF helper functions. They were reported by Jakub [0], who ran spell checkers on the generated man page [1]. [0] https://lore.kernel.org/linux-man/d22dcd47-023c-8f52-d369-7b5308e6c842@gmail.com/T/#mb02e7d4b7fb61d98fa914c77b581184e9a9537af [1] https://lore.kernel.org/linux-man/eb6a1e41-c48e-ac45-5154-ac57a2c76108@gmail.com/T/#m4a8d1b003616928013ffcd1450437309ab652f9f v3: Do not copy unrelated (and breaking) elements to tools/ header v2: Turn a ',' into a ';' Reported-by: Jakub Wilk Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220825220806.107143-1-quentin@isovalent.com --- include/uapi/linux/bpf.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bdf4bc6d8d6b..962960a98835 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4456,7 +4456,7 @@ union bpf_attr { * * **-EEXIST** if the option already exists. * - * **-EFAULT** on failrue to parse the existing header options. + * **-EFAULT** on failure to parse the existing header options. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. @@ -4665,7 +4665,7 @@ union bpf_attr { * a *map* with *task* as the **key**. From this * perspective, the usage is not much different from * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this - * helper enforces the key must be an task_struct and the map must also + * helper enforces the key must be a task_struct and the map must also * be a **BPF_MAP_TYPE_TASK_STORAGE**. * * Underneath, the value is stored locally at *task* instead of @@ -4723,7 +4723,7 @@ union bpf_attr { * * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) * Description - * Returns the stored IMA hash of the *inode* (if it's avaialable). + * Returns the stored IMA hash of the *inode* (if it's available). * If the hash is larger than *size*, then only *size* * bytes will be copied to *dst* * Return @@ -4747,12 +4747,12 @@ union bpf_attr { * * The argument *len_diff* can be used for querying with a planned * size change. This allows to check MTU prior to changing packet - * ctx. Providing an *len_diff* adjustment that is larger than the + * ctx. Providing a *len_diff* adjustment that is larger than the * actual packet size (resulting in negative packet size) will in - * principle not exceed the MTU, why it is not considered a - * failure. Other BPF-helpers are needed for performing the - * planned size change, why the responsability for catch a negative - * packet size belong in those helpers. + * principle not exceed the MTU, which is why it is not considered + * a failure. Other BPF helpers are needed for performing the + * planned size change; therefore the responsibility for catching + * a negative packet size belongs in those helpers. * * Specifying *ifindex* zero means the MTU check is performed * against the current net device. This is practical if this isn't -- cgit v1.2.3 From a724ec82966d57e4b5d36341d3e3dc1a3c011564 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:15 +0530 Subject: perf: Add system error and not in transaction branch types This expands generic branch type classification by adding two more entries there in i.e system error and not in transaction. This also updates the x86 implementation to process X86_BR_NO_TX records as appropriate. This changes branch types reported to user space on x86 platform but it should not be a problem. The possible scenarios and impacts are enumerated here. -------------------------------------------------------------------------- | kernel | perf tool | Impact | -------------------------------------------------------------------------- | old | old | Works as before | -------------------------------------------------------------------------- | old | new | PERF_BR_UNKNOWN is processed | -------------------------------------------------------------------------- | new | old | PERF_BR_NO_TX is blocked via old PERF_BR_MAX | -------------------------------------------------------------------------- | new | new | PERF_BR_NO_TX is recognized | -------------------------------------------------------------------------- When PERF_BR_NO_TX is blocked via old PERF_BR_MAX (new kernel with old perf tool) the user space might throw up an warning complaining about an unrecognized branch types being reported, but it's expected. PERF_BR_SERROR & PERF_BR_NO_TX branch types will be used for BRBE implementation on arm64 platform. PERF_BR_NO_TX complements 'abort' and 'in_tx' elements in perf_branch_entry which represent other transaction states for a given branch record. Because this completes the transaction state classification. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-2-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 30a4723aefd4..a79cc0eb4de7 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -253,6 +253,8 @@ enum { PERF_BR_COND_RET = 10, /* conditional function return */ PERF_BR_ERET = 11, /* exception return */ PERF_BR_IRQ = 12, /* irq */ + PERF_BR_SERROR = 13, /* system error */ + PERF_BR_NO_TX = 14, /* not in transaction */ PERF_BR_MAX, }; -- cgit v1.2.3 From b190bc4ac9e6d9763b61654c5a0c085ff77d7a09 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:16 +0530 Subject: perf: Extend branch type classification branch_entry.type now has ran out of space to accommodate more branch types classification. This will prevent perf branch stack implementation on arm64 (via BRBE) to capture all available branch types. Extending this bit field i.e branch_entry.type [4 bits] is not an option as it will break user space ABI both for little and big endian perf tools. Extend branch classification with a new field branch_entry.new_type via a new branch type PERF_BR_EXTEND_ABI in branch_entry.type. Perf tools which could decode PERF_BR_EXTEND_ABI, will then parse branch_entry.new_type as well. branch_entry.new_type is a 4 bit field which can hold upto 16 branch types. The first three branch types will hold various generic page faults followed by five architecture specific branch types, which can be overridden by the platform for specific use cases. These architecture specific branch types gets overridden on arm64 platform for BRBE implementation. New generic branch types - PERF_BR_NEW_FAULT_ALGN - PERF_BR_NEW_FAULT_DATA - PERF_BR_NEW_FAULT_INST New arch specific branch types - PERF_BR_NEW_ARCH_1 - PERF_BR_NEW_ARCH_2 - PERF_BR_NEW_ARCH_3 - PERF_BR_NEW_ARCH_4 - PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-3-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index a79cc0eb4de7..fed60e6b10e5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -255,6 +255,7 @@ enum { PERF_BR_IRQ = 12, /* irq */ PERF_BR_SERROR = 13, /* system error */ PERF_BR_NO_TX = 14, /* not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* extend ABI */ PERF_BR_MAX, }; @@ -269,6 +270,18 @@ enum { PERF_BR_SPEC_MAX, }; +enum { + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1388,7 +1401,8 @@ struct perf_branch_entry { cycles:16, /* cycle count to last branch */ type:4, /* branch type */ spec:2, /* branch speculation info */ - reserved:38; + new_type:4, /* additional branch type */ + reserved:34; }; union perf_sample_weight { -- cgit v1.2.3 From 5402d25aa5710d240040f73fb13d7d5c303ef071 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:17 +0530 Subject: perf: Capture branch privilege information Platforms like arm64 could capture privilege level information for all the branch records. Hence this adds a new element in the struct branch_entry to record the privilege level information, which could be requested through a new event.attr.branch_sample_type based flag PERF_SAMPLE_BRANCH_PRIV_SAVE. This flag helps user choose whether privilege information is captured. Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-4-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index fed60e6b10e5..1a258d45a3fa 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */ + PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -233,6 +235,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -282,6 +286,13 @@ enum { PERF_BR_NEW_MAX, }; +enum { + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1402,7 +1413,8 @@ struct perf_branch_entry { type:4, /* branch type */ spec:2, /* branch speculation info */ new_type:4, /* additional branch type */ - reserved:34; + priv:3, /* privilege level */ + reserved:31; }; union perf_sample_weight { -- cgit v1.2.3 From f4054e522531038354bea5c924f286fdd8ae77b5 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 24 Aug 2022 10:18:18 +0530 Subject: perf: Add PERF_BR_NEW_ARCH_[N] map for BRBE on arm64 platform BRBE captured branch types will overflow perf_branch_entry.type and generic branch types in perf_branch_entry.new_type. So override each available arch specific branch type in the following manner to comprehensively process all reported branch types in BRBE. PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 Signed-off-by: Anshuman Khandual Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: James Clark Link: https://lkml.kernel.org/r/20220824044822.70230-5-anshuman.khandual@arm.com --- include/uapi/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1a258d45a3fa..dca16582885f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -293,6 +293,12 @@ enum { PERF_BR_PRIV_HV = 3, }; +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ -- cgit v1.2.3 From abc340b38ba25cd6c7aa2c0bd9150d30738c82d0 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:46:59 +0300 Subject: xfrm: interface: support collect metadata mode This commit adds support for 'collect_md' mode on xfrm interfaces. Each net can have one collect_md device, created by providing the IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be altered and has no if_id or link device attributes. On transmit to this device, the if_id is fetched from the attached dst metadata on the skb. If exists, the link property is also fetched from the metadata. The dst metadata type used is METADATA_XFRM which holds these properties. On the receive side, xfrmi_rcv_cb() populates a dst metadata for each packet received and attaches it to the skb. The if_id used in this case is fetched from the xfrm state, and the link is fetched from the incoming device. This information can later be used by upper layers such as tc, ebpf, and ip rules. Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst metadata is postponed until after scrubing. Similarly, xfrm_input() is adapted to avoid dropping metadata dsts by only dropping 'valid' (skb_valid_dst(skb) == true) dsts. Policy matching on packets arriving from collect_md xfrmi devices is done by using the xfrm state existing in the skb's sec_path. The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session() is changed to keep the details of the if_id extraction tucked away in xfrm_interface.c. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e36d9d2c65a7..d96f13a42589 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -694,6 +694,7 @@ enum { IFLA_XFRM_UNSPEC, IFLA_XFRM_LINK, IFLA_XFRM_IF_ID, + IFLA_XFRM_COLLECT_METADATA, __IFLA_XFRM_MAX }; -- cgit v1.2.3 From 2c2493b9da9166478fe072e3054f8a5741dadb02 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Aug 2022 14:47:00 +0300 Subject: xfrm: lwtunnel: add lwtunnel support for xfrm interfaces in collect_md mode Allow specifying the xfrm interface if_id and link as part of a route metadata using the lwtunnel infrastructure. This allows for example using a single xfrm interface in collect_md mode as the target of multiple routes each specifying a different if_id. With the appropriate changes to iproute2, considering an xfrm device ipsec1 in collect_md mode one can for example add a route specifying an if_id like so: ip route add dev ipsec1 encap xfrm if_id 1 In which case traffic routed to the device via this route would use if_id in the xfrm interface policy lookup. Or in the context of vrf, one can also specify the "link" property: ip route add dev ipsec1 encap xfrm if_id 1 link_dev eth15 Note: LWT_XFRM_LINK uses NLA_U32 similar to IFLA_XFRM_LINK even though internally "link" is signed. This is consistent with other _LINK attributes in other devices as well as in bpf and should not have an effect as device indexes can't be negative. Reviewed-by: Nicolas Dichtel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/uapi/linux/lwtunnel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 2e206919125c..229655ef792f 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -15,6 +15,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_SEG6_LOCAL, LWTUNNEL_ENCAP_RPL, LWTUNNEL_ENCAP_IOAM6, + LWTUNNEL_ENCAP_XFRM, __LWTUNNEL_ENCAP_MAX, }; @@ -111,4 +112,13 @@ enum { #define LWT_BPF_MAX_HEADROOM 256 +enum { + LWT_XFRM_UNSPEC, + LWT_XFRM_IF_ID, + LWT_XFRM_LINK, + __LWT_XFRM_MAX, +}; + +#define LWT_XFRM_MAX (__LWT_XFRM_MAX - 1) + #endif /* _UAPI_LWTUNNEL_H_ */ -- cgit v1.2.3 From a41c4088cf431a9f2458f4e54fbbf3113a1bd063 Mon Sep 17 00:00:00 2001 From: Xavier Roumegue Date: Sat, 30 Jul 2022 17:48:39 +0200 Subject: media: v4l: uapi: Add user control base for DW100 controls Add a control base for DW100 driver controls, and reserve 16 controls. Signed-off-by: Xavier Roumegue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 5f46bf4a570c..87fa476428ee 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -225,6 +225,12 @@ enum v4l2_colorfx { */ #define V4L2_CID_USER_ISL7998X_BASE (V4L2_CID_USER_BASE + 0x1180) +/* + * The base for DW100 driver controls. + * We reserve 16 controls for this driver. + */ +#define V4L2_CID_USER_DW100_BASE (V4L2_CID_USER_BASE + 0x1190) + /* MPEG-class control IDs */ /* The MPEG controls are applicable to all codec controls * and the 'MPEG' part of the define is historical */ -- cgit v1.2.3 From 9d5c3c06980510b27e8f7ff033a21120e42c9715 Mon Sep 17 00:00:00 2001 From: Xavier Roumegue Date: Sat, 30 Jul 2022 17:48:40 +0200 Subject: media: uapi: Add a control for DW100 driver The DW100 driver gets the dewarping mapping as a binary blob from the userspace application through a custom control. The blob format is hardware specific so create a dedicated control for this purpose. Signed-off-by: Xavier Roumegue Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dw100.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/uapi/linux/dw100.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/dw100.h b/include/uapi/linux/dw100.h new file mode 100644 index 000000000000..3356496edd6b --- /dev/null +++ b/include/uapi/linux/dw100.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* Copyright 2022 NXP */ + +#ifndef __UAPI_DW100_H__ +#define __UAPI_DW100_H__ + +#include + +/* + * Check Documentation/userspace-api/media/drivers/dw100.rst for control details. + */ +#define V4L2_CID_DW100_DEWARPING_16x16_VERTEX_MAP (V4L2_CID_USER_DW100_BASE + 1) + +#endif -- cgit v1.2.3 From 1c24bb3f8bec5805df4763a4327ce616fadfed65 Mon Sep 17 00:00:00 2001 From: Deborah Brouwer Date: Thu, 11 Aug 2022 22:37:56 +0200 Subject: media: v4l2-ctrls: Fix typo in VP8 comment The comment for the VP8 loop filter flags uses the partially wrong name for the flags. Unlike the other VP8 flag names, the loop filter flag names don't have "_FLAG" in them. Change the comment so that it matches the actual flag definitions in the header. Signed-off-by: Deborah Brouwer Reviewed-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-controls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 87fa476428ee..b5e7d082b8ad 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -1736,7 +1736,7 @@ struct v4l2_vp8_segment { * @sharpness_level: matches sharpness_level syntax element. * @level: matches loop_filter_level syntax element. * @padding: padding field. Should be zeroed by applications. - * @flags: see V4L2_VP8_LF_FLAG_{}. + * @flags: see V4L2_VP8_LF_{}. * * This structure contains loop filter related parameters. * See the 'mb_lf_adjustments()' part of the frame header syntax, -- cgit v1.2.3 From 17611d3fb4a11ec500c49cb952faf09e114a5a10 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 18 Aug 2022 09:51:40 +0200 Subject: media: videodev2.h: drop V4L2_CAP_ASYNCIO The V4L2_CAP_ASYNCIO capability was never implemented (and in fact it isn't clear what it was supposed to do in the first place). Drop it from the capabilities list. Keep it in videodev2.h with the other defines under ifndef __KERNEL__ for backwards compatibility. This will free up a capability bit for other future uses. And having an unused and undefined I/O method is just plain confusing. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c415ce5b6829..86cae23cc446 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -502,7 +502,6 @@ struct v4l2_capability { #define V4L2_CAP_META_CAPTURE 0x00800000 /* Is a metadata capture device */ #define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */ -#define V4L2_CAP_ASYNCIO 0x02000000 /* async I/O */ #define V4L2_CAP_STREAMING 0x04000000 /* streaming I/O ioctls */ #define V4L2_CAP_META_OUTPUT 0x08000000 /* Is a metadata output device */ @@ -2683,6 +2682,11 @@ struct v4l2_create_buffers { #ifndef __KERNEL__ #define V4L2_PIX_FMT_HM12 V4L2_PIX_FMT_NV12_16L16 #define V4L2_PIX_FMT_SUNXI_TILED_NV12 V4L2_PIX_FMT_NV12_32L32 +/* + * This capability was never implemented, anyone using this cap should drop it + * from their code. + */ +#define V4L2_CAP_ASYNCIO 0x02000000 #endif #endif /* _UAPI__LINUX_VIDEODEV2_H */ -- cgit v1.2.3 From 690252f19f0e486abb8590b3a7a03d4e065d93d4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 25 Aug 2022 20:09:31 -0700 Subject: netlink: add support for ext_ack missing attributes There is currently no way to report via extack in a structured way that an attribute is missing. This leads to families resorting to string messages. Add a pair of attributes - @offset and @type for machine-readable way of reporting missing attributes. The @offset points to the nest which should have contained the attribute, @type is the expected nla_type. The offset will be skipped if the attribute is missing at the message level rather than inside a nest. User space should be able to figure out which attribute enum (AKA attribute space AKA attribute set) the nest pointed to by @offset is using. Reviewed-by: Johannes Berg Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/uapi/linux/netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e0ab261ceca2..e0689dbd2cde 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -140,6 +140,10 @@ struct nlmsgerr { * be used - in the success case - to identify a created * object or operation or similar (binary) * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute + * @NLMSGERR_ATTR_MISS_TYPE: type of a missing required attribute, + * %NLMSGERR_ATTR_MISS_NEST will not be present if the attribute was + * missing at the message level + * @NLMSGERR_ATTR_MISS_NEST: offset of the nest where attribute was missing * @__NLMSGERR_ATTR_MAX: number of attributes * @NLMSGERR_ATTR_MAX: highest attribute number */ @@ -149,6 +153,8 @@ enum nlmsgerr_attrs { NLMSGERR_ATTR_OFFS, NLMSGERR_ATTR_COOKIE, NLMSGERR_ATTR_POLICY, + NLMSGERR_ATTR_MISS_TYPE, + NLMSGERR_ATTR_MISS_NEST, __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 -- cgit v1.2.3 From fce1c23f629173e0db78b79a74f2052044a00e65 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 23 Aug 2022 10:39:47 +0300 Subject: net: virtio_net: fix notification coalescing comments Fix wording in comments for the notifications coalescing feature. Signed-off-by: Alvaro Karsz Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220823073947.14774-1-alvaro.karsz@solid-run.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/virtio_net.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 29ced55514d4..6cb842ea8979 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ -#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ +#define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -364,24 +364,24 @@ struct virtio_net_hash_config { */ #define VIRTIO_NET_CTRL_NOTF_COAL 6 /* - * Set the tx-usecs/tx-max-packets patameters. - * tx-usecs - Maximum number of usecs to delay a TX notification. - * tx-max-packets - Maximum number of packets to send before a TX notification. + * Set the tx-usecs/tx-max-packets parameters. */ struct virtio_net_ctrl_coal_tx { + /* Maximum number of packets to send before a TX notification */ __le32 tx_max_packets; + /* Maximum number of usecs to delay a TX notification */ __le32 tx_usecs; }; #define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 /* - * Set the rx-usecs/rx-max-packets patameters. - * rx-usecs - Maximum number of usecs to delay a RX notification. - * rx-max-frames - Maximum number of packets to receive before a RX notification. + * Set the rx-usecs/rx-max-packets parameters. */ struct virtio_net_ctrl_coal_rx { + /* Maximum number of packets to receive before a RX notification */ __le32 rx_max_packets; + /* Maximum number of usecs to delay a RX notification */ __le32 rx_usecs; }; -- cgit v1.2.3 From 8f36b3b4e1b58dca7d05e1579019230437e55d43 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 23 Aug 2022 18:24:56 -0600 Subject: usbip: add USBIP_URB_* URB transfer flags USBIP driver packs URB transfer flags in network packets that are exchanged between Server (usbip_host) and Client (vhci_hcd). URB_* flags are internal to kernel and could change. Where as USBIP URB flags exchanged in network packets are USBIP user API must not change. Add USBIP_URB* flags to make this an explicit API and change the client and server to map them. Details as follows: Client tx path (USBIP_CMD_SUBMIT): - Maps URB_* to USBIP_URB_* when it sends USBIP_CMD_SUBMIT packet. Server rx path (USBIP_CMD_SUBMIT): - Maps USBIP_URB_* to URB_* when it receives USBIP_CMD_SUBMIT packet. Flags aren't included in USBIP_CMD_UNLINK and USBIP_RET_SUBMIT packets and no special handling is needed for them in the following cases: - Server rx path (USBIP_CMD_UNLINK) - Client rx path & Server tx path (USBIP_RET_SUBMIT) Update protocol documentation to reflect the change. Suggested-by: Hongren Zenithal Zheng Suggested-by: Alan Stern Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20220824002456.94605-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usbip.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/usbip.h b/include/uapi/linux/usbip.h index fd393d908d8a..e4421ad55b2e 100644 --- a/include/uapi/linux/usbip.h +++ b/include/uapi/linux/usbip.h @@ -24,4 +24,30 @@ enum usbip_device_status { VDEV_ST_USED, VDEV_ST_ERROR }; + +/* USB URB Transfer flags: + * + * USBIP server and client (vchi) pack URBs in TCP packets. The following + * are the transfer type defines used in USBIP protocol. + */ + +#define USBIP_URB_SHORT_NOT_OK 0x0001 +#define USBIP_URB_ISO_ASAP 0x0002 +#define USBIP_URB_NO_TRANSFER_DMA_MAP 0x0004 +#define USBIP_URB_ZERO_PACKET 0x0040 +#define USBIP_URB_NO_INTERRUPT 0x0080 +#define USBIP_URB_FREE_BUFFER 0x0100 +#define USBIP_URB_DIR_IN 0x0200 +#define USBIP_URB_DIR_OUT 0 +#define USBIP_URB_DIR_MASK USBIP_URB_DIR_IN + +#define USBIP_URB_DMA_MAP_SINGLE 0x00010000 +#define USBIP_URB_DMA_MAP_PAGE 0x00020000 +#define USBIP_URB_DMA_MAP_SG 0x00040000 +#define USBIP_URB_MAP_LOCAL 0x00080000 +#define USBIP_URB_SETUP_MAP_SINGLE 0x00100000 +#define USBIP_URB_SETUP_MAP_LOCAL 0x00200000 +#define USBIP_URB_DMA_SG_COMBINED 0x00400000 +#define USBIP_URB_ALIGNED_TEMP_BUFFER 0x00800000 + #endif /* _UAPI_LINUX_USBIP_H */ -- cgit v1.2.3 From 23c12d5fc02fb0712c64f3e87a27fcfa78e8af9c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:01 +0100 Subject: Revert "io_uring: add zc notification flush requests" This reverts commit 492dddb4f6e3a5839c27d41ff1fecdbe6c3ab851. Soon we won't have the very notion of notification flushing, so remove notification flushing requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8850334ca56e65b413cb34fd158db81d7b2865a3.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9e0b5c8d92ce..18ae5caf1773 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -301,7 +301,6 @@ enum io_uring_op { */ enum { IORING_RSRC_UPDATE_FILES, - IORING_RSRC_UPDATE_NOTIF, }; /* -- cgit v1.2.3 From d9808ceb3129b811becebdee3ec96d189c83e56c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:02 +0100 Subject: Revert "io_uring: rename IORING_OP_FILES_UPDATE" This reverts commit 4379d5f15b3fd4224c37841029178aa8082a242e. We removed notification flushing, also cleanup uapi preparation changes to not pollute it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/89edc3905350f91e1b6e26d9dbf42ee44fd451a2.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 18ae5caf1773..111b651366bd 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -178,8 +178,7 @@ enum io_uring_op { IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, - IORING_OP_RSRC_UPDATE, - IORING_OP_FILES_UPDATE = IORING_OP_RSRC_UPDATE, + IORING_OP_FILES_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, @@ -228,7 +227,6 @@ enum io_uring_op { #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) - /* * sqe->splice_flags * extends splice(2) flags @@ -295,14 +293,6 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) - -/* - * IORING_OP_RSRC_UPDATE flags - */ -enum { - IORING_RSRC_UPDATE_FILES, -}; - /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ -- cgit v1.2.3 From 57f332246afa5929bdf2e7a5facddedb43549be4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:03 +0100 Subject: io_uring/notif: remove notif registration We're going to remove the userspace exposed zerocopy notification API, remove notification registration. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6ff00b97be99869c386958a990593c9c31cf105b.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 111b651366bd..b11c57b0ebb5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -279,14 +279,10 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. - * - * IORING_RECVSEND_NOTIF_FLUSH Flush a notification after a successful - * successful. Only for zerocopy sends. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) -#define IORING_RECVSEND_NOTIF_FLUSH (1U << 3) /* * accept flags stored in sqe->ioprio @@ -474,10 +470,6 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, - /* zerocopy notification API */ - IORING_REGISTER_NOTIFIERS = 26, - IORING_UNREGISTER_NOTIFIERS = 27, - /* this goes last */ IORING_REGISTER_LAST }; -- cgit v1.2.3 From b48c312be05e83b55a4d58bf61f80b4a3288fb7e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 11:54:04 +0100 Subject: io_uring/net: simplify zerocopy send user API Following user feedback, this patch simplifies zerocopy send API. One of the main complaints is that the current API is difficult with the userspace managing notification slots, and then send retries with error handling make it even worse. Instead of keeping notification slots change it to the per-request notifications model, which posts both completion and notification CQEs for each request when any data has been sent, and only one CQE if it fails. All notification CQEs will have IORING_CQE_F_NOTIF set and IORING_CQE_F_MORE in completion CQEs indicates whether to wait a notification or not. IOSQE_CQE_SKIP_SUCCESS is disallowed with zerocopy sends for now. This is less flexible, but greatly simplifies the user API and also the kernel implementation. We reuse notif helpers in this patch, but in the future there won't be need for keeping two requests. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/95287640ab98fc9417370afb16e310677c63e6ce.1662027856.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index b11c57b0ebb5..6b83177fd41d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -71,8 +71,8 @@ struct io_uring_sqe { __s32 splice_fd_in; __u32 file_index; struct { - __u16 notification_idx; __u16 addr_len; + __u16 __pad3[1]; }; }; union { @@ -205,7 +205,7 @@ enum io_uring_op { IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, - IORING_OP_SENDZC_NOTIF, + IORING_OP_SEND_ZC, /* this goes last, obviously */ IORING_OP_LAST, @@ -326,10 +326,13 @@ struct io_uring_cqe { * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv + * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct + * them from sends. */ #define IORING_CQE_F_BUFFER (1U << 0) #define IORING_CQE_F_MORE (1U << 1) #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) +#define IORING_CQE_F_NOTIF (1U << 3) enum { IORING_CQE_BUFFER_SHIFT = 16, -- cgit v1.2.3 From 385ecfdfb5d5ad0ff37e20381c70e18af8cf1bdb Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Mon, 29 Aug 2022 17:18:46 +0530 Subject: vfio: Add the device features for the low power entry and exit This patch adds the following new device features for the low power entry and exit in the header file. The implementation for the same will be added in the subsequent patches. - VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY - VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP - VFIO_DEVICE_FEATURE_LOW_POWER_EXIT For vfio-pci based devices, with the standard PCI PM registers, all power states cannot be achieved. The platform-based power management needs to be involved to go into the lowest power state. For doing low power entry and exit with platform-based power management, these device features can be used. The entry device feature has two variants. These two variants are mainly to support the different behaviour for the low power entry. If there is any access for the VFIO device on the host side, then the device will be moved out of the low power state without the user's guest driver involvement. Some devices (for example NVIDIA VGA or 3D controller) require the user's guest driver involvement for each low-power entry. In the first variant, the host can return the device to low power automatically. The device will continue to attempt to reach low power until the low power exit feature is called. In the second variant, if the device exits low power due to an access, the host kernel will signal the user via the provided eventfd and will not return the device to low power without a subsequent call to one of the low power entry features. A call to the low power exit feature is optional if the user provided eventfd is signaled. These device features only support VFIO_DEVICE_FEATURE_SET and VFIO_DEVICE_FEATURE_PROBE operations. Signed-off-by: Abhishek Sahu Link: https://lore.kernel.org/r/20220829114850.4341-2-abhsahu@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 733a1cddde30..76a173f973de 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -986,6 +986,62 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, }; +/* + * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power + * state with the platform-based power management. Device use of lower power + * states depends on factors managed by the runtime power management core, + * including system level support and coordinating support among dependent + * devices. Enabling device low power entry does not guarantee lower power + * usage by the device, nor is a mechanism provided through this feature to + * know the current power state of the device. If any device access happens + * (either from the host or through the vfio uAPI) when the device is in the + * low power state, then the host will move the device out of the low power + * state as necessary prior to the access. Once the access is completed, the + * device may re-enter the low power state. For single shot low power support + * with wake-up notification, see + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd + * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after + * calling LOW_POWER_EXIT. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 + +/* + * This device feature has the same behavior as + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user + * provides an eventfd for wake-up notification. When the device moves out of + * the low power state for the wake-up, the host will not allow the device to + * re-enter a low power state without a subsequent user call to one of the low + * power entry device feature IOCTLs. Access to mmap'd device regions is + * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the + * low power exit. The low power exit can happen either through LOW_POWER_EXIT + * or through any other access (where the wake-up notification has been + * generated). The access to mmap'd device regions will not trigger low power + * exit. + * + * The notification through the provided eventfd will be generated only when + * the device has entered and is resumed from a low power state after + * calling this device feature IOCTL. A device that has not entered low power + * state, as managed through the runtime power management core, will not + * generate a notification through the provided eventfd on access. Calling the + * LOW_POWER_EXIT feature is optional in the case where notification has been + * signaled on the provided eventfd that a resume from low power has occurred. + */ +struct vfio_device_low_power_entry_with_wakeup { + __s32 wakeup_eventfd; + __u32 reserved; +}; + +#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 + +/* + * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as + * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or + * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. + * This device feature IOCTL may itself generate a wakeup eventfd notification + * in the latter case if the device had previously entered a low power state. + */ +#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 44c51472bef83bb70b43e2f4b7a592096f32a855 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 31 Aug 2022 17:40:09 +0300 Subject: bpf: Support getting tunnel flags Existing 'bpf_skb_get_tunnel_key' extracts various tunnel parameters (id, ttl, tos, local and remote) but does not expose ip_tunnel_info's tun_flags to the BPF program. It makes sense to expose tun_flags to the BPF program. Assume for example multiple GRE tunnels maintained on a single GRE interface in collect_md mode. The program expects origins to initiate over GRE, however different origins use different GRE characteristics (e.g. some prefer to use GRE checksum, some do not; some pass a GRE key, some do not, etc..). A BPF program getting tun_flags can therefore remember the relevant flags (e.g. TUNNEL_CSUM, TUNNEL_SEQ...) for each initiating remote. In the reply path, the program can use 'bpf_skb_set_tunnel_key' in order to correctly reply to the remote, using similar characteristics, based on the stored tunnel flags. Introduce BPF_F_TUNINFO_FLAGS flag for bpf_skb_get_tunnel_key. If specified, 'bpf_tunnel_key->tunnel_flags' is set with the tun_flags. Decided to use the existing unused 'tunnel_ext' as the storage for the 'tunnel_flags' in order to avoid changing bpf_tunnel_key's layout. Also, the following has been considered during the design: 1. Convert the "interesting" internal TUNNEL_xxx flags back to BPF_F_yyy and place into the new 'tunnel_flags' field. This has 2 drawbacks: - The BPF_F_yyy flags are from *set_tunnel_key* enumeration space, e.g. BPF_F_ZERO_CSUM_TX. It is awkward that it is "returned" into tunnel_flags from a *get_tunnel_key* call. - Not all "interesting" TUNNEL_xxx flags can be mapped to existing BPF_F_yyy flags, and it doesn't make sense to create new BPF_F_yyy flags just for purposes of the returned tunnel_flags. 2. Place key.tun_flags into 'tunnel_flags' but mask them, keeping only "interesting" flags. That's ok, but the drawback is that what's "interesting" for my usecase might be limiting for other usecases. Therefore I decided to expose what's in key.tun_flags *as is*, which seems most flexible. The BPF user can just choose to ignore bits he's not interested in. The TUNNEL_xxx are also UAPI, so no harm exposing them back in the get_tunnel_key call. Signed-off-by: Shmulik Ladkani Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220831144010.174110-1-shmulik.ladkani@gmail.com --- include/uapi/linux/bpf.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 962960a98835..837c0f9b7fdd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5659,6 +5659,11 @@ enum { BPF_F_SEQ_NUMBER = (1ULL << 3), }; +/* BPF_FUNC_skb_get_tunnel_key flags. */ +enum { + BPF_F_TUNINFO_FLAGS = (1ULL << 4), +}; + /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ @@ -5848,7 +5853,10 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; - __u16 tunnel_ext; /* Padding, future use. */ + union { + __u16 tunnel_ext; /* compat */ + __be16 tunnel_flags; + }; __u32 tunnel_label; union { __u32 local_ipv4; -- cgit v1.2.3 From 5854a09b49574da5a77a0f36ad7b021a2661321d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 31 Aug 2022 14:12:42 -0500 Subject: net/ipv4: Use __DECLARE_FLEX_ARRAY() helper We now have a cleaner way to keep compatibility with user-space (a.k.a. not breaking it) when we need to keep in place a one-element array (for its use in user-space) together with a flexible-array member (for its use in kernel-space) without making it hard to read at the source level. This is through the use of the new __DECLARE_FLEX_ARRAY() helper macro. The size and memory layout of the structure is preserved after the changes. See below. Before changes: $ pahole -C ip_msfilter net/ipv4/igmp.o struct ip_msfilter { union { struct { __be32 imsf_multiaddr_aux; /* 0 4 */ __be32 imsf_interface_aux; /* 4 4 */ __u32 imsf_fmode_aux; /* 8 4 */ __u32 imsf_numsrc_aux; /* 12 4 */ __be32 imsf_slist[1]; /* 16 4 */ }; /* 0 20 */ struct { __be32 imsf_multiaddr; /* 0 4 */ __be32 imsf_interface; /* 4 4 */ __u32 imsf_fmode; /* 8 4 */ __u32 imsf_numsrc; /* 12 4 */ __be32 imsf_slist_flex[0]; /* 16 0 */ }; /* 0 16 */ }; /* 0 20 */ /* size: 20, cachelines: 1, members: 1 */ /* last cacheline: 20 bytes */ }; After changes: $ pahole -C ip_msfilter net/ipv4/igmp.o struct ip_msfilter { __be32 imsf_multiaddr; /* 0 4 */ __be32 imsf_interface; /* 4 4 */ __u32 imsf_fmode; /* 8 4 */ __u32 imsf_numsrc; /* 12 4 */ union { __be32 imsf_slist[1]; /* 16 4 */ struct { struct { } __empty_imsf_slist_flex; /* 16 0 */ __be32 imsf_slist_flex[0]; /* 16 0 */ }; /* 16 0 */ }; /* 16 4 */ /* size: 20, cachelines: 1, members: 5 */ /* last cacheline: 20 bytes */ }; In the past, we had to duplicate the whole original structure within a union, and update the names of all the members. Now, we just need to declare the flexible-array member to be used in kernel-space through the __DECLARE_FLEX_ARRAY() helper together with the one-element array, within a union. This makes the source code more clean and easier to read. Link: https://github.com/KSPP/linux/issues/193 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 14168225cecd..578daa6f816b 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,21 +188,13 @@ struct ip_mreq_source { }; struct ip_msfilter { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; union { - struct { - __be32 imsf_multiaddr_aux; - __be32 imsf_interface_aux; - __u32 imsf_fmode_aux; - __u32 imsf_numsrc_aux; - __be32 imsf_slist[1]; - }; - struct { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist_flex[]; - }; + __be32 imsf_slist[1]; + __DECLARE_FLEX_ARRAY(__be32, imsf_slist_flex); }; }; -- cgit v1.2.3 From a36c421690b3e5dee38fc12abfcabda742f00064 Mon Sep 17 00:00:00 2001 From: James Prestwood Date: Fri, 26 Aug 2022 10:00:31 -0700 Subject: wifi: nl80211: Add POWERED_ADDR_CHANGE feature Add a new extended feature bit signifying that the wireless hardware supports changing the MAC address while the underlying net_device is powered. Note that this has a different meaning from IFF_LIVE_ADDR_CHANGE as additional restrictions might be imposed by the hardware, such as: - No connection is active on this interface, carrier is off - No scan is in progress - No offchannel operations are in progress Signed-off-by: James Prestwood Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 573db20403dc..a00a23840c57 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -6281,6 +6281,14 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_RADAR_BACKGROUND: Device supports background radar/CAC * detection. * + * @NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE: Device can perform a MAC address + * change without having to bring the underlying network device down + * first. For example, in station mode this can be used to vary the + * origin MAC address prior to a connection to a new AP for privacy + * or other reasons. Note that certain driver specific restrictions + * might apply, e.g. no scans in progress, no offchannel operations + * in progress, and no active connections. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6348,6 +6356,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_BSS_COLOR, NL80211_EXT_FEATURE_FILS_CRYPTO_OFFLOAD, NL80211_EXT_FEATURE_RADAR_BACKGROUND, + NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit v1.2.3 From 6522047c65764c9aaec8009e73daa8c0b138c701 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Sep 2022 16:12:50 +0200 Subject: wifi: nl80211: add MLD address to assoc BSS entries Add an MLD address attribute to BSS entries that the interface is currently associated with to help userspace figure out what's going on. Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a00a23840c57..c32e7616a366 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4959,6 +4959,7 @@ enum nl80211_bss_scan_width { * using the nesting index as the antenna number. * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz * @NL80211_BSS_MLO_LINK_ID: MLO link ID of the BSS (u8). + * @NL80211_BSS_MLD_ADDR: MLD address of this BSS if connected to it. * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -4985,6 +4986,7 @@ enum nl80211_bss { NL80211_BSS_CHAIN_SIGNAL, NL80211_BSS_FREQUENCY_OFFSET, NL80211_BSS_MLO_LINK_ID, + NL80211_BSS_MLD_ADDR, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From 835e699ef82adfc85ac4cc3f1f237c1adfdefd20 Mon Sep 17 00:00:00 2001 From: Jagath Jog J Date: Wed, 31 Aug 2022 12:01:16 +0530 Subject: iio: Add new event type gesture and use direction for single and double tap Add new event type for tap called gesture and the direction can be used to differentiate single and double tap. This may be used by accelerometer sensors to express single and double tap events. For directional tap, modifiers like IIO_MOD_(X/Y/Z) can be used along with singletap and doubletap direction. Signed-off-by: Jagath Jog J Link: https://lore.kernel.org/r/20220831063117.4141-2-jagathjog1996@gmail.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 472cead10d8d..913864221ac4 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -105,6 +105,7 @@ enum iio_event_type { IIO_EV_TYPE_MAG_ADAPTIVE, IIO_EV_TYPE_CHANGE, IIO_EV_TYPE_MAG_REFERENCED, + IIO_EV_TYPE_GESTURE, }; enum iio_event_direction { @@ -112,6 +113,8 @@ enum iio_event_direction { IIO_EV_DIR_RISING, IIO_EV_DIR_FALLING, IIO_EV_DIR_NONE, + IIO_EV_DIR_SINGLETAP, + IIO_EV_DIR_DOUBLETAP, }; #endif /* _UAPI_IIO_TYPES_H_ */ -- cgit v1.2.3 From 27ed9353aec9de4277b3389c9f2b04beb6ab7622 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 31 Aug 2022 08:26:57 -0700 Subject: bpf: Update descriptions for helpers bpf_get_func_arg[_cnt]() Now instead of the number of arguments, the number of registers holding argument values are stored in trampoline. Update the description of bpf_get_func_arg[_cnt]() helpers. Previous programs without struct arguments should continue to work as usual. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220831152657.2078805-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 793103b10eab..3df78c56c1bf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5079,12 +5079,12 @@ union bpf_attr { * * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) * returned in **value**. * * Return * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. + * **-EINVAL** if n >= argument register count of traced function. * * long bpf_get_func_ret(void *ctx, u64 *value) * Description @@ -5097,10 +5097,11 @@ union bpf_attr { * * long bpf_get_func_arg_cnt(void *ctx) * Description - * Get number of arguments of the traced function (for tracing programs). + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. * * Return - * The number of arguments of the traced function. + * The number of argument registers of the traced function. * * int bpf_get_retval(void) * Description -- cgit v1.2.3 From a0a4de4d897f5ce672e086cb6b9f91a306af6953 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 22 Aug 2022 16:41:21 +0200 Subject: netfilter: remove NFPROTO_DECNET Decnet has been removed. so no need to reserve space in arrays for it. Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index 53411ccc69db..5a79ccb76701 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -63,7 +63,9 @@ enum { NFPROTO_NETDEV = 5, NFPROTO_BRIDGE = 7, NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ NFPROTO_DECNET = 12, +#endif NFPROTO_NUMPROTO, }; -- cgit v1.2.3 From 42ee53f9bfd3e4cf58ae7656e0d11075f5fe8489 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Sep 2022 21:34:41 +0300 Subject: vfio: Introduce DMA logging uAPIs DMA logging allows a device to internally record what DMAs the device is initiating and report them back to userspace. It is part of the VFIO migration infrastructure that allows implementing dirty page tracking during the pre copy phase of live migration. Only DMA WRITEs are logged, and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. This patch introduces the DMA logging involved uAPIs. It uses the FEATURE ioctl with its GET/SET/PROBE options as of below. It exposes a PROBE option to detect if the device supports DMA logging. It exposes a SET option to start device DMA logging in given IOVAs ranges. It exposes a SET option to stop device DMA logging that was previously started. It exposes a GET option to read back and clear the device DMA log. Extra details exist as part of vfio.h per a specific option. Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220908183448.195262-4-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 86 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 76a173f973de..d7d8e0922376 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1042,6 +1042,92 @@ struct vfio_device_low_power_entry_with_wakeup { */ #define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 +/* + * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. + * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports + * DMA logging. + * + * DMA logging allows a device to internally record what DMAs the device is + * initiating and report them back to userspace. It is part of the VFIO + * migration infrastructure that allows implementing dirty page tracking + * during the pre copy phase of live migration. Only DMA WRITEs are logged, + * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. + * + * When DMA logging is started a range of IOVAs to monitor is provided and the + * device can optimize its logging to cover only the IOVA range given. Each + * DMA that the device initiates inside the range will be logged by the device + * for later retrieval. + * + * page_size is an input that hints what tracking granularity the device + * should try to achieve. If the device cannot do the hinted page size then + * it's the driver choice which page size to pick based on its support. + * On output the device will return the page size it selected. + * + * ranges is a pointer to an array of + * struct vfio_device_feature_dma_logging_range. + * + * The core kernel code guarantees to support by minimum num_ranges that fit + * into a single kernel page. User space can try higher values but should give + * up if the above can't be achieved as of some driver limitations. + * + * A single call to start device DMA logging can be issued and a matching stop + * should follow at the end. Another start is not allowed in the meantime. + */ +struct vfio_device_feature_dma_logging_control { + __aligned_u64 page_size; + __u32 num_ranges; + __u32 __reserved; + __aligned_u64 ranges; +}; + +struct vfio_device_feature_dma_logging_range { + __aligned_u64 iova; + __aligned_u64 length; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 + +/* + * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started + * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START + */ +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 + +/* + * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log + * + * Query the device's DMA log for written pages within the given IOVA range. + * During querying the log is cleared for the IOVA range. + * + * bitmap is a pointer to an array of u64s that will hold the output bitmap + * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits + * is given by: + * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) + * + * The input page_size can be any power of two value and does not have to + * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver + * will format its internal logging to match the reporting page size, possibly + * by replicating bits if the internal page size is lower than requested. + * + * The LOGGING_REPORT will only set bits in the bitmap and never clear or + * perform any initialization of the user provided bitmap. + * + * If any error is returned userspace should assume that the dirty log is + * corrupted. Error recovery is to consider all memory dirty and try to + * restart the dirty tracking, or to abort/restart the whole migration. + * + * If DMA logging is not enabled, an error will be returned. + * + */ +struct vfio_device_feature_dma_logging_report { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 bitmap; +}; + +#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 825cf206ed510c4a1758bef8957e2b039253e2e3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 26 Aug 2022 23:58:44 -0700 Subject: statx: add direct I/O alignment information Traditionally, the conditions for when DIO (direct I/O) is supported were fairly simple. For both block devices and regular files, DIO had to be aligned to the logical block size of the block device. However, due to filesystem features that have been added over time (e.g. multi-device support, data journalling, inline data, encryption, verity, compression, checkpoint disabling, log-structured mode), the conditions for when DIO is allowed on a regular file have gotten increasingly complex. Whether a particular regular file supports DIO, and with what alignment, can depend on various file attributes and filesystem mount options, as well as which block device(s) the file's data is located on. Moreover, the general rule of DIO needing to be aligned to the block device's logical block size was recently relaxed to allow user buffers (but not file offsets) aligned to the DMA alignment instead. See commit bf8d08532bc1 ("iomap: add support for dma aligned direct-io"). XFS has an ioctl XFS_IOC_DIOINFO that exposes DIO alignment information. Uplifting this to the VFS is one possibility. However, as discussed (https://lore.kernel.org/linux-fsdevel/20220120071215.123274-1-ebiggers@kernel.org/T/#u), this ioctl is rarely used and not known to be used outside of XFS-specific code. It was also never intended to indicate when a file doesn't support DIO at all, nor was it intended for block devices. Therefore, let's expose this information via statx(). Add the STATX_DIOALIGN flag and two new statx fields associated with it: * stx_dio_mem_align: the alignment (in bytes) required for user memory buffers for DIO, or 0 if DIO is not supported on the file. * stx_dio_offset_align: the alignment (in bytes) required for file offsets and I/O segment lengths for DIO, or 0 if DIO is not supported on the file. This will only be nonzero if stx_dio_mem_align is nonzero, and vice versa. Note that as with other statx() extensions, if STATX_DIOALIGN isn't set in the returned statx struct, then these new fields won't be filled in. This will happen if the file is neither a regular file nor a block device, or if the file is a regular file and the filesystem doesn't support STATX_DIOALIGN. It might also happen if the caller didn't include STATX_DIOALIGN in the request mask, since statx() isn't required to return unrequested information. This commit only adds the VFS-level plumbing for STATX_DIOALIGN. For regular files, individual filesystems will still need to add code to support it. For block devices, a separate commit will wire it up too. Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Martin K. Petersen Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20220827065851.135710-2-ebiggers@kernel.org --- include/uapi/linux/stat.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 1500a0f58041..7cab2c65d3d7 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -124,7 +124,8 @@ struct statx { __u32 stx_dev_minor; /* 0x90 */ __u64 stx_mnt_id; - __u64 __spare2; + __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ /* 0xa0 */ __u64 __spare3[12]; /* Spare space for future expansion */ /* 0x100 */ @@ -152,6 +153,7 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ +#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ -- cgit v1.2.3 From 2d5de004e009add27db76c5cdc9f1f7f7dc087e7 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Mon, 8 Aug 2022 10:56:11 -0700 Subject: userfaultfd: add /dev/userfaultfd for fine grained access control Historically, it has been shown that intercepting kernel faults with userfaultfd (thereby forcing the kernel to wait for an arbitrary amount of time) can be exploited, or at least can make some kinds of exploits easier. So, in 37cd0575b8 "userfaultfd: add UFFD_USER_MODE_ONLY" we changed things so, in order for kernel faults to be handled by userfaultfd, either the process needs CAP_SYS_PTRACE, or this sysctl must be configured so that any unprivileged user can do it. In a typical implementation of a hypervisor with live migration (take QEMU/KVM as one such example), we do indeed need to be able to handle kernel faults. But, both options above are less than ideal: - Toggling the sysctl increases attack surface by allowing any unprivileged user to do it. - Granting the live migration process CAP_SYS_PTRACE gives it this ability, but *also* the ability to "observe and control the execution of another process [...], and examine and change [its] memory and registers" (from ptrace(2)). This isn't something we need or want to be able to do, so granting this permission violates the "principle of least privilege". This is all a long winded way to say: we want a more fine-grained way to grant access to userfaultfd, without granting other additional permissions at the same time. To achieve this, add a /dev/userfaultfd misc device. This device provides an alternative to the userfaultfd(2) syscall for the creation of new userfaultfds. The idea is, any userfaultfds created this way will be able to handle kernel faults, without the caller having any special capabilities. Access to this mechanism is instead restricted using e.g. standard filesystem permissions. [axelrasmussen@google.com: Handle misc_register() failure properly] Link: https://lkml.kernel.org/r/20220819205201.658693-3-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20220808175614.3885028-3-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Acked-by: Nadav Amit Acked-by: Peter Xu Acked-by: Mike Rapoport Cc: Al Viro Cc: Dave Hansen Cc: Dmitry V. Levin Cc: Gleb Fotengauer-Malinovskiy Cc: Hugh Dickins Cc: Jan Kara Cc: Jonathan Corbet Cc: Mel Gorman Cc: Mike Kravetz Cc: Shuah Khan Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zhang Yi Cc: Mike Rapoport Signed-off-by: Andrew Morton --- include/uapi/linux/userfaultfd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 7d32b1e797fb..005e5e306266 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -12,6 +12,10 @@ #include +/* ioctls for /dev/userfaultfd */ +#define USERFAULTFD_IOC 0xAA +#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) + /* * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In -- cgit v1.2.3 From b4e12b2d70fd9eccdb3cef8015dc1788ca38e3fd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 8 Sep 2022 14:41:04 -0700 Subject: perf: Kill __PERF_SAMPLE_CALLCHAIN_EARLY There's no in-tree user anymore. Let's get rid of it. Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220908214104.3851807-3-namhyung@kernel.org --- include/uapi/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index dca16582885f..e639c74cf5fb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -164,8 +164,6 @@ enum perf_event_sample_format { PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ - - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) -- cgit v1.2.3 From 061834624c87282c6d9d8c5395aaff4380e5e1fc Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:21 +0200 Subject: can: set CANFD_FDF flag in all CAN FD frame structures To simplify the testing in user space all struct canfd_frame's provided by the CAN subsystem of the Linux kernel now have the CANFD_FDF flag set in canfd_frame::flags. NB: Handcrafted ETH_P_CANFD frames introduced via PF_PACKET socket might not set this bit correctly. During the check for sufficient headroom in PF_PACKET sk_buffs the uninitialized CAN sk_buff data structures are filled. In the case of a CAN FD frame the CANFD_FDF flag is set accordingly. As the CAN frame content is already zero initialized in alloc_canfd_skb() the obsolete initialization of cf->flags in the CTU CAN FD driver has been removed as it would overwrite the already set CANFD_FDF flag. Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-4-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 90801ada2bbe..7b23eeeb3273 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -141,8 +141,8 @@ struct can_frame { * When this is done the former differentiation via CAN_MTU / CANFD_MTU gets * lost. CANFD_FDF allows programmers to mark CAN FD frames in the case of * using struct canfd_frame for mixed CAN / CAN FD content (dual use). - * N.B. the Kernel APIs do NOT provide mixed CAN / CAN FD content inside of - * struct canfd_frame therefore the CANFD_FDF flag is disregarded by Linux. + * Since the introduction of CAN XL the CANFD_FDF flag is set in all CAN FD + * frame structures provided by the CAN subsystem of the Linux kernel. */ #define CANFD_BRS 0x01 /* bit rate switch (second bitrate for payload data) */ #define CANFD_ESI 0x02 /* error state indicator of the transmitting node */ -- cgit v1.2.3 From 1a3e3034c049503ec6992a4a7d573e7fff31fac4 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:22 +0200 Subject: can: canxl: introduce CAN XL data structure This patch adds defines for data structures and length information for CAN XL (CAN with eXtended data Length) which can transfer up to 2048 byte inside a single frame. Notable changes from CAN FD: - the 11 bit arbitration field is now named 'priority' instead of 'can_id' (there are no 29 bit identifiers nor RTR frames anymore) - the data length needs a uint16 value to cover up to 2048 byte (the length element position is different to struct can[fd]_frame) - new fields (SDT, AF) and a SEC bit have been introduced - the virtual CAN interface identifier is not part if the CAN XL frame struct as this VCID value is stored in struct skbuff (analog to vlan id) Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-5-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 7b23eeeb3273..dd645ea72306 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -48,6 +48,7 @@ #include #include +#include /* for offsetof */ /* controller area network (CAN) kernel definitions */ @@ -60,6 +61,7 @@ #define CAN_SFF_MASK 0x000007FFU /* standard frame format (SFF) */ #define CAN_EFF_MASK 0x1FFFFFFFU /* extended frame format (EFF) */ #define CAN_ERR_MASK 0x1FFFFFFFU /* omit EFF, RTR, ERR flags */ +#define CANXL_PRIO_MASK CAN_SFF_MASK /* 11 bit priority mask */ /* * Controller Area Network Identifier structure @@ -73,6 +75,7 @@ typedef __u32 canid_t; #define CAN_SFF_ID_BITS 11 #define CAN_EFF_ID_BITS 29 +#define CANXL_PRIO_BITS CAN_SFF_ID_BITS /* * Controller Area Network Error Message Frame Mask structure @@ -91,6 +94,16 @@ typedef __u32 can_err_mask_t; #define CANFD_MAX_DLC 15 #define CANFD_MAX_DLEN 64 +/* + * CAN XL payload length and DLC definitions according to ISO 11898-1 + * CAN XL DLC ranges from 0 .. 2047 => data length from 1 .. 2048 byte + */ +#define CANXL_MIN_DLC 0 +#define CANXL_MAX_DLC 2047 +#define CANXL_MAX_DLC_MASK 0x07FF +#define CANXL_MIN_DLEN 1 +#define CANXL_MAX_DLEN 2048 + /** * struct can_frame - Classical CAN frame structure (aka CAN 2.0B) * @can_id: CAN ID of the frame and CAN_*_FLAG flags, see canid_t definition @@ -166,8 +179,46 @@ struct canfd_frame { __u8 data[CANFD_MAX_DLEN] __attribute__((aligned(8))); }; +/* + * defined bits for canxl_frame.flags + * + * The canxl_frame.flags element contains two bits CANXL_XLF and CANXL_SEC + * and shares the relative position of the struct can[fd]_frame.len element. + * The CANXL_XLF bit ALWAYS needs to be set to indicate a valid CAN XL frame. + * As a side effect setting this bit intentionally breaks the length checks + * for Classical CAN and CAN FD frames. + * + * Undefined bits in canxl_frame.flags are reserved and shall be set to zero. + */ +#define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */ +#define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */ + +/** + * struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure + * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags + * @flags: additional flags for CAN XL + * @sdt: SDU (service data unit) type + * @len: frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN) + * @af: acceptance field + * @data: CAN XL frame payload (CANXL_MIN_DLEN .. CANXL_MAX_DLEN byte) + * + * @prio shares the same position as @can_id from struct can[fd]_frame. + */ +struct canxl_frame { + canid_t prio; /* 11 bit priority for arbitration (canid_t) */ + __u8 flags; /* additional flags for CAN XL */ + __u8 sdt; /* SDU (service data unit) type */ + __u16 len; /* frame payload length in byte */ + __u32 af; /* acceptance field */ + __u8 data[CANXL_MAX_DLEN]; +}; + #define CAN_MTU (sizeof(struct can_frame)) #define CANFD_MTU (sizeof(struct canfd_frame)) +#define CANXL_MTU (sizeof(struct canxl_frame)) +#define CANXL_HDR_SIZE (offsetof(struct canxl_frame, data)) +#define CANXL_MIN_MTU (CANXL_HDR_SIZE + 64) +#define CANXL_MAX_MTU CANXL_MTU /* particular protocols of the protocol family PF_CAN */ #define CAN_RAW 1 /* RAW sockets */ -- cgit v1.2.3 From fb08cba12b52cba4366e858932307649dc5304e2 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:23 +0200 Subject: can: canxl: update CAN infrastructure for CAN XL frames - add new ETH_P_CANXL ethernet protocol type - update skb checks for CAN XL - add alloc_canxl_skb() which now needs a data length parameter - introduce init_can_skb_reserve() to reduce code duplication Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-6-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index d370165bc621..69e0457eb200 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -138,6 +138,7 @@ #define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */ #define ETH_P_CAN 0x000C /* CAN: Controller Area Network */ #define ETH_P_CANFD 0x000D /* CANFD: CAN flexible data rate*/ +#define ETH_P_CANXL 0x000E /* CANXL: eXtended frame Length */ #define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/ #define ETH_P_TR_802_2 0x0011 /* 802.2 frames */ #define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */ -- cgit v1.2.3 From 626332696d7506e8f844a564277bdba2dc78fcb5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Sep 2022 19:07:25 +0200 Subject: can: raw: add CAN XL support Enable CAN_RAW sockets to read and write CAN XL frames analogue to the CAN FD extension (new CAN_RAW_XL_FRAMES sockopt). A CAN XL network interface is capable to handle Classical CAN, CAN FD and CAN XL frames. When CAN_RAW_XL_FRAMES is enabled, the CAN_RAW socket checks whether the addressed CAN network interface is capable to handle the provided CAN frame. In opposite to the fixed number of bytes for - CAN frames (CAN_MTU = sizeof(struct can_frame)) - CAN FD frames (CANFD_MTU = sizeof(struct can_frame)) the number of bytes when reading/writing CAN XL frames depends on the number of data bytes. For efficiency reasons the length of the struct canxl_frame is truncated to the needed size for read/write operations. This leads to a calculated size of CANXL_HDR_SIZE + canxl_frame::len which is enforced on write() operations and guaranteed on read() operations. NB: Valid length values are 1 .. 2048 (CANXL_MIN_DLEN .. CANXL_MAX_DLEN). Acked-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20220912170725.120748-8-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/raw.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index 3386aa81fdf2..ff12f525c37c 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -62,6 +62,7 @@ enum { CAN_RAW_RECV_OWN_MSGS, /* receive my own msgs (default:off) */ CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ CAN_RAW_JOIN_FILTERS, /* all filters must match to trigger */ + CAN_RAW_XL_FRAMES, /* allow CAN XL frames (default:off) */ }; #endif /* !_UAPI_CAN_RAW_H */ -- cgit v1.2.3 From 7e6e1b57162ed6a2d32d2f0929c27d79482ff706 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 6 Sep 2022 11:55:58 +0200 Subject: rtnetlink: advertise allmulti counter Like what was done with IFLA_PROMISCUITY, add IFLA_ALLMULTI to advertise the allmulti counter. The flag IFF_ALLMULTI is advertised only if it was directly set by a userland app. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index e36d9d2c65a7..0bfa9a99ebb6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -370,6 +370,7 @@ enum { IFLA_GRO_MAX_SIZE, IFLA_TSO_MAX_SIZE, IFLA_TSO_MAX_SEGS, + IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ __IFLA_MAX }; -- cgit v1.2.3 From 65b32f801bfbc54dc98144a6ec26082b59d131ee Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 8 Sep 2022 10:16:40 -0700 Subject: uapi: move IPPROTO_L2TP to in.h IPPROTO_L2TP is currently defined in l2tp.h, but most of ip protocols are defined in in.h file. Move it there in order to keep code clean. Acked-by: Guillaume Nault Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- include/uapi/linux/in.h | 2 ++ include/uapi/linux/l2tp.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 578daa6f816b..f243ce665f74 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -68,6 +68,8 @@ enum { #define IPPROTO_PIM IPPROTO_PIM IPPROTO_COMP = 108, /* Compression Header Protocol */ #define IPPROTO_COMP IPPROTO_COMP + IPPROTO_L2TP = 115, /* Layer 2 Tunnelling Protocol */ +#define IPPROTO_L2TP IPPROTO_L2TP IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ #define IPPROTO_SCTP IPPROTO_SCTP IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index bab8c9708611..7d81c3e1ec29 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -13,8 +13,6 @@ #include #include -#define IPPROTO_L2TP 115 - /** * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets * @l2tp_family: address family number AF_L2TPIP. -- cgit v1.2.3 From 8b189ea08c334f25dbb3d076f8adb8b80491d01d Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 8 Sep 2022 10:16:42 -0700 Subject: net/sched: flower: Add L2TPv3 filter Add support for matching on L2TPv3 session ID. Session ID can be specified only when ip proto was set to IPPROTO_L2TP. Example filter: # tc filter add dev $PF1 ingress prio 1 protocol ip \ flower \ ip_proto l2tp \ l2tpv3_sid 1234 \ skip_sw \ action mirred egress redirect dev $VF1_PR Acked-by: Guillaume Nault Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 877309d6ca3c..648a82f32666 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -592,6 +592,8 @@ enum { TCA_FLOWER_KEY_PPPOE_SID, /* be16 */ TCA_FLOWER_KEY_PPP_PROTO, /* be16 */ + TCA_FLOWER_KEY_L2TPV3_SID, /* be32 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 95f510d0b792f308d3d748242fe960c35bdc2c62 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 11 Sep 2022 04:06:59 +0300 Subject: net: dsa: allow the DSA master to be seen and changed through rtnetlink Some DSA switches have multiple CPU ports, which can be used to improve CPU termination throughput, but DSA, through dsa_tree_setup_cpu_ports(), sets up only the first one, leading to suboptimal use of hardware. The desire is to not change the default configuration but to permit the user to create a dynamic mapping between individual user ports and the CPU port that they are served by, configurable through rtnetlink. It is also intended to permit load balancing between CPU ports, and in that case, the foreseen model is for the DSA master to be a bonding interface whose lowers are the physical DSA masters. To that end, we create a struct rtnl_link_ops for DSA user ports with the "dsa" kind. We expose the IFLA_DSA_MASTER link attribute that contains the ifindex of the newly desired DSA master. Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0bfa9a99ebb6..3d39fb398d65 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1375,4 +1375,14 @@ enum { #define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) +/* DSA section */ + +enum { + IFLA_DSA_UNSPEC, + IFLA_DSA_MASTER, + __IFLA_DSA_MAX, +}; + +#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ -- cgit v1.2.3 From 848f3c0d47694924536e2894cb349613201321c6 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Mon, 12 Sep 2022 19:16:18 +0200 Subject: seg6: add NEXT-C-SID support for SRv6 End behavior The NEXT-C-SID mechanism described in [1] offers the possibility of encoding several SRv6 segments within a single 128 bit SID address. Such a SID address is called a Compressed SID (C-SID) container. In this way, the length of the SID List can be drastically reduced. A SID instantiated with the NEXT-C-SID flavor considers an IPv6 address logically structured in three main blocks: i) Locator-Block; ii) Locator-Node Function; iii) Argument. C-SID container +------------------------------------------------------------------+ | Locator-Block |Loc-Node| Argument | | |Function| | +------------------------------------------------------------------+ <--------- B -----------> <- NF -> <------------- A ---------------> (i) The Locator-Block can be any IPv6 prefix available to the provider; (ii) The Locator-Node Function represents the node and the function to be triggered when a packet is received on the node; (iii) The Argument carries the remaining C-SIDs in the current C-SID container. The NEXT-C-SID mechanism relies on the "flavors" framework defined in [2]. The flavors represent additional operations that can modify or extend a subset of the existing behaviors. This patch introduces the support for flavors in SRv6 End behavior implementing the NEXT-C-SID one. An SRv6 End behavior with NEXT-C-SID flavor works as an End behavior but it is capable of processing the compressed SID List encoded in C-SID containers. An SRv6 End behavior with NEXT-C-SID flavor can be configured to support user-provided Locator-Block and Locator-Node Function lengths. In this implementation, such lengths must be evenly divisible by 8 (i.e. must be byte-aligned), otherwise the kernel informs the user about invalid values with a meaningful error code and message through netlink_ext_ack. If Locator-Block and/or Locator-Node Function lengths are not provided by the user during configuration of an SRv6 End behavior instance with NEXT-C-SID flavor, the kernel will choose their default values i.e., 32-bit Locator-Block and 16-bit Locator-Node Function. [1] - https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression [2] - https://datatracker.ietf.org/doc/html/rfc8986 Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- include/uapi/linux/seg6_local.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h index 332b18f318f8..4fdc424c9cb3 100644 --- a/include/uapi/linux/seg6_local.h +++ b/include/uapi/linux/seg6_local.h @@ -28,6 +28,7 @@ enum { SEG6_LOCAL_BPF, SEG6_LOCAL_VRFTABLE, SEG6_LOCAL_COUNTERS, + SEG6_LOCAL_FLAVORS, __SEG6_LOCAL_MAX, }; #define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) @@ -110,4 +111,27 @@ enum { #define SEG6_LOCAL_CNT_MAX (__SEG6_LOCAL_CNT_MAX - 1) +/* SRv6 End* Flavor attributes */ +enum { + SEG6_LOCAL_FLV_UNSPEC, + SEG6_LOCAL_FLV_OPERATION, + SEG6_LOCAL_FLV_LCBLOCK_BITS, + SEG6_LOCAL_FLV_LCNODE_FN_BITS, + __SEG6_LOCAL_FLV_MAX, +}; + +#define SEG6_LOCAL_FLV_MAX (__SEG6_LOCAL_FLV_MAX - 1) + +/* Designed flavor operations for SRv6 End* Behavior */ +enum { + SEG6_LOCAL_FLV_OP_UNSPEC, + SEG6_LOCAL_FLV_OP_PSP, + SEG6_LOCAL_FLV_OP_USP, + SEG6_LOCAL_FLV_OP_USD, + SEG6_LOCAL_FLV_OP_NEXT_CSID, + __SEG6_LOCAL_FLV_OP_MAX +}; + +#define SEG6_LOCAL_FLV_OP_MAX (__SEG6_LOCAL_FLV_OP_MAX - 1) + #endif -- cgit v1.2.3 From ead77b65aef430d3bfe63524c243a60a29eb8d90 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 2 Sep 2022 15:29:24 +0200 Subject: HID: export hid_report_type to uapi When we are dealing with eBPF, we need to have access to the report type. Currently our implementation differs from the USB standard, making it impossible for users to know the exact value besides hardcoding it themselves. And instead of a blank define, convert it as an enum. Note that we need to also do change in the ll_driver API, but given that this will have a wider impact outside of this tree, we leave this as a TODO for the future. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220902132938.2409206-10-benjamin.tissoires@redhat.com --- include/uapi/linux/hid.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hid.h b/include/uapi/linux/hid.h index b34492a87a8a..b25b0bacaff2 100644 --- a/include/uapi/linux/hid.h +++ b/include/uapi/linux/hid.h @@ -42,6 +42,18 @@ #define USB_INTERFACE_PROTOCOL_KEYBOARD 1 #define USB_INTERFACE_PROTOCOL_MOUSE 2 +/* + * HID report types --- Ouch! HID spec says 1 2 3! + */ + +enum hid_report_type { + HID_INPUT_REPORT = 0, + HID_OUTPUT_REPORT = 1, + HID_FEATURE_REPORT = 2, + + HID_REPORT_TYPES, +}; + /* * HID class requests */ -- cgit v1.2.3 From 735e1bb1b8067e209941a6bdfde23214696ff47e Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 2 Sep 2022 15:29:25 +0200 Subject: HID: convert defines of HID class requests into a proper enum This allows to export the type in BTF and so in the automatically generated vmlinux.h. It will also add some static checks on the users when we change the ll driver API (see not below). Note that we need to also do change in the ll_driver API, but given that this will have a wider impact outside of this tree, we leave this as a TODO for the future. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20220902132938.2409206-11-benjamin.tissoires@redhat.com --- include/uapi/linux/hid.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/hid.h b/include/uapi/linux/hid.h index b25b0bacaff2..a4dcb34386e3 100644 --- a/include/uapi/linux/hid.h +++ b/include/uapi/linux/hid.h @@ -58,12 +58,14 @@ enum hid_report_type { * HID class requests */ -#define HID_REQ_GET_REPORT 0x01 -#define HID_REQ_GET_IDLE 0x02 -#define HID_REQ_GET_PROTOCOL 0x03 -#define HID_REQ_SET_REPORT 0x09 -#define HID_REQ_SET_IDLE 0x0A -#define HID_REQ_SET_PROTOCOL 0x0B +enum hid_class_request { + HID_REQ_GET_REPORT = 0x01, + HID_REQ_GET_IDLE = 0x02, + HID_REQ_GET_PROTOCOL = 0x03, + HID_REQ_SET_REPORT = 0x09, + HID_REQ_SET_IDLE = 0x0A, + HID_REQ_SET_PROTOCOL = 0x0B, +}; /* * HID class descriptor types -- cgit v1.2.3 From 7b5541a932c21f7e07f068a785afcc25986e4893 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Sep 2022 14:03:09 +0200 Subject: headers: Remove some left-over license text in include/uapi/linux/netfilter/ When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Remove it now. Also, in xt_connmark.h, move the copyright text at the top of the file which is a much more common pattern. Signed-off-by: Christophe JAILLET Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/ipset/ip_set.h | 4 ---- include/uapi/linux/netfilter/xt_AUDIT.h | 4 ---- include/uapi/linux/netfilter/xt_connmark.h | 13 ++++--------- include/uapi/linux/netfilter/xt_osf.h | 14 -------------- 4 files changed, 4 insertions(+), 31 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 6397d75899bc..79e5d68b87af 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -3,10 +3,6 @@ * Patrick Schaaf * Martin Josefsson * Copyright (C) 2003-2011 Jozsef Kadlecsik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _UAPI_IP_SET_H #define _UAPI_IP_SET_H diff --git a/include/uapi/linux/netfilter/xt_AUDIT.h b/include/uapi/linux/netfilter/xt_AUDIT.h index 1b314e2f84ac..56a3f6092e0c 100644 --- a/include/uapi/linux/netfilter/xt_AUDIT.h +++ b/include/uapi/linux/netfilter/xt_AUDIT.h @@ -4,10 +4,6 @@ * * (C) 2010-2011 Thomas Graf * (C) 2010-2011 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _XT_AUDIT_TARGET_H diff --git a/include/uapi/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h index f01c19b83a2b..41b578ccd03b 100644 --- a/include/uapi/linux/netfilter/xt_connmark.h +++ b/include/uapi/linux/netfilter/xt_connmark.h @@ -1,18 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright (C) 2002,2004 MARA Systems AB + * by Henrik Nordstrom + */ + #ifndef _XT_CONNMARK_H #define _XT_CONNMARK_H #include -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - enum { XT_CONNMARK_SET = 0, XT_CONNMARK_SAVE, diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h index 6e466236ca4b..f1f097896bdf 100644 --- a/include/uapi/linux/netfilter/xt_osf.h +++ b/include/uapi/linux/netfilter/xt_osf.h @@ -1,20 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2003+ Evgeniy Polyakov - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . */ #ifndef _XT_OSF_H -- cgit v1.2.3 From c0e0d6ba25f180ab76d3c18f8b360a119dffa634 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Tue, 30 Aug 2022 05:50:10 -0700 Subject: io_uring: add IORING_SETUP_DEFER_TASKRUN Allow deferring async tasks until the user calls io_uring_enter(2) with the IORING_ENTER_GETEVENTS flag. Enable this mode with a flag at io_uring_setup time. This functionality requires that the later io_uring_enter will be called from the same submission task, and therefore restrict this flag to work only when IORING_SETUP_SINGLE_ISSUER is also set. Being able to hand pick when tasks are run prevents the problem where there is current work to be done, however task work runs anyway. For example, a common workload would obtain a batch of CQEs, and process each one. Interrupting this to additional taskwork would add latency but not gain anything. If instead task work is deferred to just before more CQEs are obtained then no additional latency is added. The way this is implemented is by trying to keep task work local to a io_ring_ctx, rather than to the submission task. This is required, as the application will want to wake up only a single io_ring_ctx at a time to process work, and so the lists of work have to be kept separate. This has some other benefits like not having to check the task continually in handle_tw_list (and potentially unlocking/locking those), and reducing locks in the submit & process completions path. There are networking cases where using this option can reduce request latency by 50%. For example a contrived example using [1] where the client sends 2k data and receives the same data back while doing some system calls (to trigger task work) shows this reduction. The reason ends up being that if sending responses is delayed by processing task work, then the client side sits idle. Whereas reordering the sends first means that the client runs it's workload in parallel with the local task work. [1]: Using https://github.com/DylanZA/netbench/tree/defer_run Client: ./netbench --client_only 1 --control_port 10000 --host --tx "epoll --threads 16 --per_thread 1 --size 2048 --resp 2048 --workload 1000" Server: ./netbench --server_only 1 --control_port 10000 --rx "io_uring --defer_taskrun 0 --workload 100" --rx "io_uring --defer_taskrun 1 --workload 100" Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20220830125013.570060-5-dylany@fb.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6b83177fd41d..972b179bc07a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -157,6 +157,13 @@ enum { */ #define IORING_SETUP_SINGLE_ISSUER (1U << 12) +/* + * Defer running task work to get events. + * Rather than running bits of task work whenever the task transitions + * try to do it just before it is needed. + */ +#define IORING_SETUP_DEFER_TASKRUN (1U << 13) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, -- cgit v1.2.3 From bcc57a48eaee63a71983996c4c9d89ce7cbf55d9 Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Wed, 7 Sep 2022 15:21:52 +0200 Subject: iio: add modifiers for linear acceleration Add IIO_MOD_LINEAR_X, IIO_MOD_LINEAR_Y and IIO_MOD_LINEAR_Z modifiers to te IIO core, which is preparatory for adding the Bosch BNO055 IMU driver. Bosch BNO055 IMU can report raw accelerations (among x, y and z axis) as well as the so called "linear accelerations" (again, among x, y and z axis) which is basically the acceleration after subtracting gravity and for which those new modifiers are for. Signed-off-by: Andrea Merello Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220907132205.28021-2-andrea.merello@iit.it Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 913864221ac4..b7ba9861a24d 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -95,6 +95,9 @@ enum iio_modifier { IIO_MOD_ETHANOL, IIO_MOD_H2, IIO_MOD_O2, + IIO_MOD_LINEAR_X, + IIO_MOD_LINEAR_Y, + IIO_MOD_LINEAR_Z, }; enum iio_event_type { @@ -118,4 +121,3 @@ enum iio_event_direction { }; #endif /* _UAPI_IIO_TYPES_H_ */ - -- cgit v1.2.3 From dcedf14553810cd6bbf7227c995beb4548e0859d Mon Sep 17 00:00:00 2001 From: Andrea Merello Date: Wed, 7 Sep 2022 15:21:55 +0200 Subject: iio: add modifers for pitch, yaw, roll Add modifiers for reporting rotations as euler angles (i.e. yaw, pitch and roll). Signed-off-by: Andrea Merello Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20220907132205.28021-5-andrea.merello@iit.it Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index b7ba9861a24d..c79f2f046a0b 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -98,6 +98,9 @@ enum iio_modifier { IIO_MOD_LINEAR_X, IIO_MOD_LINEAR_Y, IIO_MOD_LINEAR_Z, + IIO_MOD_PITCH, + IIO_MOD_YAW, + IIO_MOD_ROLL, }; enum iio_event_type { -- cgit v1.2.3 From 0e426a3ae030a9e891899370229e117158b35de6 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 21 Sep 2022 10:46:02 +0000 Subject: bpf, cgroup: Reject prog_attach_flags array when effective query Attach flags is only valid for attached progs of this layer cgroup, but not for effective progs. For querying with EFFECTIVE flags, exporting attach flags does not make sense. So when effective query, we reject prog_attach_flags array and don't need to populate it. Also we limit attach_flags to output 0 during effective query. Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP") Signed-off-by: Pu Lehui Link: https://lore.kernel.org/r/20220921104604.2340580-2-pulehui@huaweicloud.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 59a217ca2dfd..4eff7fc7ae58 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1233,7 +1233,7 @@ enum { /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -1432,7 +1432,10 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; - __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ -- cgit v1.2.3 From 493108d95f1464ccd101d4e5cfa7e93f1fc64d47 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 21 Sep 2022 12:17:54 +0100 Subject: io_uring/net: zerocopy sendmsg Add a zerocopy version of sendmsg. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6aabc4bdfc0ec78df6ec9328137e394af9d4e7ef.1663668091.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 972b179bc07a..92f29d9505a6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -213,6 +213,7 @@ enum io_uring_op { IORING_OP_SOCKET, IORING_OP_URING_CMD, IORING_OP_SEND_ZC, + IORING_OP_SENDMSG_ZC, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 583c1f420173f7d84413a1a1fbf5109d798b4faa Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 19 Sep 2022 19:00:57 -0500 Subject: bpf: Define new BPF_MAP_TYPE_USER_RINGBUF map type We want to support a ringbuf map type where samples are published from user-space, to be consumed by BPF programs. BPF currently supports a kernel -> user-space circular ring buffer via the BPF_MAP_TYPE_RINGBUF map type. We'll need to define a new map type for user-space -> kernel, as none of the helpers exported for BPF_MAP_TYPE_RINGBUF will apply to a user-space producer ring buffer, and we'll want to add one or more helper functions that would not apply for a kernel-producer ring buffer. This patch therefore adds a new BPF_MAP_TYPE_USER_RINGBUF map type definition. The map type is useless in its current form, as there is no way to access or use it for anything until we one or more BPF helpers. A follow-on patch will therefore add a new helper function that allows BPF programs to run callbacks on samples that are published to the ring buffer. Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220920000100.477320-2-void@manifault.com --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3df78c56c1bf..e18c85324db6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -928,6 +928,7 @@ enum bpf_map_type { BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, }; /* Note that tracing related programs such as -- cgit v1.2.3 From 20571567384428dfc9fe5cf9f2e942e1df13c2dd Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 19 Sep 2022 19:00:58 -0500 Subject: bpf: Add bpf_user_ringbuf_drain() helper In a prior change, we added a new BPF_MAP_TYPE_USER_RINGBUF map type which will allow user-space applications to publish messages to a ring buffer that is consumed by a BPF program in kernel-space. In order for this map-type to be useful, it will require a BPF helper function that BPF programs can invoke to drain samples from the ring buffer, and invoke callbacks on those samples. This change adds that capability via a new BPF helper function: bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) BPF programs may invoke this function to run callback_fn() on a series of samples in the ring buffer. callback_fn() has the following signature: long callback_fn(struct bpf_dynptr *dynptr, void *context); Samples are provided to the callback in the form of struct bpf_dynptr *'s, which the program can read using BPF helper functions for querying struct bpf_dynptr's. In order to support bpf_ringbuf_drain(), a new PTR_TO_DYNPTR register type is added to the verifier to reflect a dynptr that was allocated by a helper function and passed to a BPF program. Unlike PTR_TO_STACK dynptrs which are allocated on the stack by a BPF program, PTR_TO_DYNPTR dynptrs need not use reference tracking, as the BPF helper is trusted to properly free the dynptr before returning. The verifier currently only supports PTR_TO_DYNPTR registers that are also DYNPTR_TYPE_LOCAL. Note that while the corresponding user-space libbpf logic will be added in a subsequent patch, this patch does contain an implementation of the .map_poll() callback for BPF_MAP_TYPE_USER_RINGBUF maps. This .map_poll() callback guarantees that an epoll-waiting user-space producer will receive at least one event notification whenever at least one sample is drained in an invocation of bpf_user_ringbuf_drain(), provided that the function is not invoked with the BPF_RB_NO_WAKEUP flag. If the BPF_RB_FORCE_WAKEUP flag is provided, a wakeup notification is sent even if no sample was drained. Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220920000100.477320-3-void@manifault.com --- include/uapi/linux/bpf.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e18c85324db6..ead35f39f185 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5388,6 +5388,43 @@ union bpf_attr { * Return * Current *ktime*. * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5599,6 +5636,7 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ FN(ktime_get_tai_ns), \ + FN(user_ringbuf_drain), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 0c3e10cb44232833a50cb8e3e784c432906a60c1 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 20 Sep 2022 18:12:31 -0400 Subject: net: phy: Add support for rate matching This adds support for rate matching (also known as rate adaptation) to the phy subsystem. The general idea is that the phy interface runs at one speed, and the MAC throttles the rate at which it sends packets to the link speed. There's a good overview of several techniques for achieving this at [1]. This patch adds support for three: pause-frame based (such as in Aquantia phys), CRS-based (such as in 10PASS-TS and 2BASE-TL), and open-loop-based (such as in 10GBASE-W). This patch makes a few assumptions and a few non assumptions about the types of rate matching available. First, it assumes that different phys may use different forms of rate matching. Second, it assumes that phys can use rate matching for any of their supported link speeds (e.g. if a phy supports 10BASE-T and XGMII, then it can adapt XGMII to 10BASE-T). Third, it does not assume that all interface modes will use the same form of rate matching. Fourth, it does not assume that all phy devices will support rate matching (even if some do). Relaxing or strengthening these (non-)assumptions could result in a different API. For example, if all interface modes were assumed to use the same form of rate matching, then a bitmask of interface modes supportting rate matching would suffice. For some better visibility into the process, the current rate matching mode is exposed as part of the ethtool ksettings. For the moment, only read access is supported. I'm not sure what userspace might want to configure yet (disable it altogether, disable just one mode, specify the mode to use, etc.). For the moment, since only pause-based rate adaptation support is added in the next few commits, rate matching can be disabled altogether by adjusting the advertisement. 802.3 calls this feature "rate adaptation" in clause 49 (10GBASE-R) and "rate matching" in clause 61 (10PASS-TL and 2BASE-TS). Aquantia also calls this feature "rate adaptation". I chose "rate matching" because it is shorter, and because Russell doesn't think "adaptation" is correct in this context. Signed-off-by: Sean Anderson Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 18 ++++++++++++++++-- include/uapi/linux/ethtool_netlink.h | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2d5741fd44bb..fe9893d1485d 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1840,6 +1840,20 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define MASTER_SLAVE_STATE_SLAVE 3 #define MASTER_SLAVE_STATE_ERR 4 +/* These are used to throttle the rate of data on the phy interface when the + * native speed of the interface is higher than the link speed. These should + * not be used for phy interfaces which natively support multiple speeds (e.g. + * MII or SGMII). + */ +/* No rate matching performed. */ +#define RATE_MATCH_NONE 0 +/* The phy sends pause frames to throttle the MAC. */ +#define RATE_MATCH_PAUSE 1 +/* The phy asserts CRS to prevent the MAC from transmitting. */ +#define RATE_MATCH_CRS 2 +/* The MAC is programmed with a sufficiently-large IPG. */ +#define RATE_MATCH_OPEN_LOOP 3 + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 @@ -2033,8 +2047,8 @@ enum ethtool_reset_flags { * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. * @master_slave_state: Master/slave port state. + * @rate_matching: Rate adaptation performed by the PHY * @reserved: Reserved for future use; see the note on reserved space. - * @reserved1: Reserved for future use; see the note on reserved space. * @link_mode_masks: Variable length bitmaps. * * If autonegotiation is disabled, the speed and @duplex represent the @@ -2085,7 +2099,7 @@ struct ethtool_link_settings { __u8 transceiver; __u8 master_slave_cfg; __u8 master_slave_state; - __u8 reserved1[1]; + __u8 rate_matching; __u32 reserved[7]; __u32 link_mode_masks[]; /* layout of link_mode_masks fields: diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d2fb4f7be61b..408a664fad59 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -242,6 +242,7 @@ enum { ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ ETHTOOL_A_LINKMODES_LANES, /* u32 */ + ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ /* add new constants above here */ __ETHTOOL_A_LINKMODES_CNT, -- cgit v1.2.3 From 195624d9c26b64c6856863da30ec578a790feec4 Mon Sep 17 00:00:00 2001 From: Patrick Rohr Date: Tue, 20 Sep 2022 12:48:25 -0700 Subject: tun: support not enabling carrier in TUNSETIFF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds support for not enabling carrier during TUNSETIFF interface creation by specifying the IFF_NO_CARRIER flag. Our tests make heavy use of tun interfaces. In some scenarios, the test process creates the interface but another process brings it up after the interface is discovered via netlink notification. In that case, it is not possible to create a tun/tap interface with carrier off without it racing against the bring up. Immediately setting carrier off via TUNSETCARRIER is still too late. Signed-off-by: Patrick Rohr Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Cc: Jason Wang Cc: Stephen Hemminger Cc: Nicolas Dichtel Reviewed-by: Maciej Żenczykowski Acked-by: Jason Wang Reviewed-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 2ec07de1d73b..b6d7b868f290 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -67,6 +67,8 @@ #define IFF_TAP 0x0002 #define IFF_NAPI 0x0010 #define IFF_NAPI_FRAGS 0x0020 +/* Used in TUNSETIFF to bring up tun/tap without carrier */ +#define IFF_NO_CARRIER 0x0040 #define IFF_NO_PI 0x1000 /* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 -- cgit v1.2.3 From 77a440e2cbb4b8688b567104f80ce1cda1afbbc4 Mon Sep 17 00:00:00 2001 From: ZiyangZhang Date: Fri, 23 Sep 2022 23:39:14 +0800 Subject: ublk_drv: define macros for recovery feature and check them Define some macros for recovery feature. UBLK_S_DEV_QUIESCED implies that ublk_device is quiesced and is ready for recovery. This state can be observed by userspace. UBLK_F_USER_RECOVERY implies that: (1) ublk_drv enables recovery feature. It won't let monitor_work to automatically abort rqs and release the device. (2) With a dying ubq_daemon, ublk_drv ends(aborts) rqs issued to userspace(ublksrv) before crash. (3) With a dying ubq_daemon, in task work and ublk_queue_rq(), ublk_drv requeues rqs. Signed-off-by: ZiyangZhang Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220923153919.44078-3-ZiyangZhang@linux.alibaba.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 677edaab2b66..340ff14bde49 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -74,9 +74,12 @@ */ #define UBLK_F_NEED_GET_DATA (1UL << 2) +#define UBLK_F_USER_RECOVERY (1UL << 3) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 +#define UBLK_S_DEV_QUIESCED 2 /* shipped via sqe->cmd of io_uring command */ struct ublksrv_ctrl_cmd { -- cgit v1.2.3 From a0d41dc1137470fd4c5c2ef8fdc244d7565e69e6 Mon Sep 17 00:00:00 2001 From: ZiyangZhang Date: Fri, 23 Sep 2022 23:39:17 +0800 Subject: ublk_drv: support UBLK_F_USER_RECOVERY_REISSUE UBLK_F_USER_RECOVERY_REISSUE implies that: With a dying ubq_daemon, ublk_drv let monitor_work requeues rq issued to userspace(ublksrv) before the ubq_daemon is dying. UBLK_F_USER_RECOVERY_REISSUE is designed for backends which: (1) tolerate double-write since ublk_drv may issue the same rq twice. (2) does not let frontend users get I/O error, such as read-only FS and VM backend. Signed-off-by: ZiyangZhang Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220923153919.44078-6-ZiyangZhang@linux.alibaba.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 340ff14bde49..332370628757 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -76,6 +76,8 @@ #define UBLK_F_USER_RECOVERY (1UL << 3) +#define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From c732a852b419fa057b53657e2daaf9433940391c Mon Sep 17 00:00:00 2001 From: ZiyangZhang Date: Fri, 23 Sep 2022 23:39:18 +0800 Subject: ublk_drv: add START_USER_RECOVERY and END_USER_RECOVERY support START_USER_RECOVERY and END_USER_RECOVERY are two new control commands to support user recovery feature. After a crash, user should send START_USER_RECOVERY, it will: (1) check if (a)current ublk_device is UBLK_S_DEV_QUIESCED which was set by quiesce_work and (b)chardev is released (2) reinit all ubqs, including: (a) put the task_struct and reset ->ubq_daemon to NULL. (b) reset all ublk_io. (3) reset ub->mm to NULL. Then, user should start a new process and send FETCH_REQ on each ubq_daemon. Finally, user should send END_USER_RECOVERY, it will: (1) wait for all new ubq_daemons getting ready. (2) update ublksrv_pid (3) unquiesce the request queue and expect incoming ublk_queue_rq() (4) convert ub's state to UBLK_S_DEV_LIVE Note: we can handle STOP_DEV between START_USER_RECOVERY and END_USER_RECOVERY. This is helpful to users who cannot start new process after sending START_USER_RECOVERY ctrl-cmd. Signed-off-by: ZiyangZhang Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220923153919.44078-7-ZiyangZhang@linux.alibaba.com Signed-off-by: Jens Axboe --- include/uapi/linux/ublk_cmd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 332370628757..8f88e3a29998 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -17,7 +17,8 @@ #define UBLK_CMD_STOP_DEV 0x07 #define UBLK_CMD_SET_PARAMS 0x08 #define UBLK_CMD_GET_PARAMS 0x09 - +#define UBLK_CMD_START_USER_RECOVERY 0x10 +#define UBLK_CMD_END_USER_RECOVERY 0x11 /* * IO commands, issued by ublk server, and handled by ublk driver. * -- cgit v1.2.3 From 7d37539037c2fca70346fbedc219f655253d5cff Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 24 Sep 2022 07:00:00 +0200 Subject: fuse: implement ->tmpfile() This is basically equivalent to the FUSE_CREATE operation which creates and opens a regular file. Add a new FUSE_TMPFILE operation, otherwise just reuse the protocol and the code for FUSE_CREATE. Acked-by: Christian Brauner (Microsoft) Signed-off-by: Miklos Szeredi --- include/uapi/linux/fuse.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d6ccee961891..76ee8f9e024a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -194,6 +194,9 @@ * - add FUSE_SECURITY_CTX init flag * - add security context to create, mkdir, symlink, and mknod requests * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX + * + * 7.37 + * - add FUSE_TMPFILE */ #ifndef _LINUX_FUSE_H @@ -229,7 +232,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 36 +#define FUSE_KERNEL_MINOR_VERSION 37 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -537,6 +540,7 @@ enum fuse_opcode { FUSE_SETUPMAPPING = 48, FUSE_REMOVEMAPPING = 49, FUSE_SYNCFS = 50, + FUSE_TMPFILE = 51, /* CUSE specific operations */ CUSE_INIT = 4096, -- cgit v1.2.3 From 8e2b7442d27ca2a56a116d44d597e77ca21dfed3 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 9 Jun 2022 12:31:13 +0200 Subject: media: rockchip: rkisp1: Define macros for DPCC configurations in UAPI Extend the UAPI rkisp1-config.h header with macros for all DPCC configuration fields. While at it, clarify of fix issues in the DPCC documentation. Signed-off-by: Laurent Pinchart Reviewed-by: Paul Elder Reviewed-by: Dafna Hirschfeld Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/rkisp1-config.h | 77 ++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 583ca0d9a79d..730673ecc63d 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -117,7 +117,46 @@ /* * Defect Pixel Cluster Correction */ -#define RKISP1_CIF_ISP_DPCC_METHODS_MAX 3 +#define RKISP1_CIF_ISP_DPCC_METHODS_MAX 3 + +#define RKISP1_CIF_ISP_DPCC_MODE_STAGE1_ENABLE (1U << 2) + +#define RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_STAGE1_INCL_G_CENTER (1U << 0) +#define RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_STAGE1_INCL_RB_CENTER (1U << 1) +#define RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_STAGE1_G_3X3 (1U << 2) +#define RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_STAGE1_RB_3X3 (1U << 3) + +/* 0-2 for sets 1-3 */ +#define RKISP1_CIF_ISP_DPCC_SET_USE_STAGE1_USE_SET(n) ((n) << 0) +#define RKISP1_CIF_ISP_DPCC_SET_USE_STAGE1_USE_FIX_SET (1U << 3) + +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_PG_GREEN_ENABLE (1U << 0) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_LC_GREEN_ENABLE (1U << 1) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RO_GREEN_ENABLE (1U << 2) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RND_GREEN_ENABLE (1U << 3) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RG_GREEN_ENABLE (1U << 4) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_PG_RED_BLUE_ENABLE (1U << 8) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_LC_RED_BLUE_ENABLE (1U << 9) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RO_RED_BLUE_ENABLE (1U << 10) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RND_RED_BLUE_ENABLE (1U << 11) +#define RKISP1_CIF_ISP_DPCC_METHODS_SET_RG_RED_BLUE_ENABLE (1U << 12) + +#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_G(v) ((v) << 0) +#define RKISP1_CIF_ISP_DPCC_LINE_THRESH_RB(v) ((v) << 8) +#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_G(v) ((v) << 0) +#define RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_RB(v) ((v) << 8) +#define RKISP1_CIF_ISP_DPCC_PG_FAC_G(v) ((v) << 0) +#define RKISP1_CIF_ISP_DPCC_PG_FAC_RB(v) ((v) << 8) +#define RKISP1_CIF_ISP_DPCC_RND_THRESH_G(v) ((v) << 0) +#define RKISP1_CIF_ISP_DPCC_RND_THRESH_RB(v) ((v) << 8) +#define RKISP1_CIF_ISP_DPCC_RG_FAC_G(v) ((v) << 0) +#define RKISP1_CIF_ISP_DPCC_RG_FAC_RB(v) ((v) << 8) + +#define RKISP1_CIF_ISP_DPCC_RO_LIMITS_n_G(n, v) ((v) << ((n) * 4)) +#define RKISP1_CIF_ISP_DPCC_RO_LIMITS_n_RB(n, v) ((v) << ((n) * 4 + 2)) + +#define RKISP1_CIF_ISP_DPCC_RND_OFFS_n_G(n, v) ((v) << ((n) * 4)) +#define RKISP1_CIF_ISP_DPCC_RND_OFFS_n_RB(n, v) ((v) << ((n) * 4 + 2)) /* * Denoising pre filter @@ -249,16 +288,20 @@ struct rkisp1_cif_isp_bls_config { }; /** - * struct rkisp1_cif_isp_dpcc_methods_config - Methods Configuration used by DPCC + * struct rkisp1_cif_isp_dpcc_methods_config - DPCC methods set configuration * - * Methods Configuration used by Defect Pixel Cluster Correction + * This structure stores the configuration of one set of methods for the DPCC + * algorithm. Multiple methods can be selected in each set (independently for + * the Green and Red/Blue components) through the @method field, the result is + * the logical AND of all enabled methods. The remaining fields set thresholds + * and factors for each method. * - * @method: Method enable bits - * @line_thresh: Line threshold - * @line_mad_fac: Line MAD factor - * @pg_fac: Peak gradient factor - * @rnd_thresh: Rank Neighbor Difference threshold - * @rg_fac: Rank gradient factor + * @method: Method enable bits (RKISP1_CIF_ISP_DPCC_METHODS_SET_*) + * @line_thresh: Line threshold (RKISP1_CIF_ISP_DPCC_LINE_THRESH_*) + * @line_mad_fac: Line Mean Absolute Difference factor (RKISP1_CIF_ISP_DPCC_LINE_MAD_FAC_*) + * @pg_fac: Peak gradient factor (RKISP1_CIF_ISP_DPCC_PG_FAC_*) + * @rnd_thresh: Rank Neighbor Difference threshold (RKISP1_CIF_ISP_DPCC_RND_THRESH_*) + * @rg_fac: Rank gradient factor (RKISP1_CIF_ISP_DPCC_RG_FAC_*) */ struct rkisp1_cif_isp_dpcc_methods_config { __u32 method; @@ -272,14 +315,16 @@ struct rkisp1_cif_isp_dpcc_methods_config { /** * struct rkisp1_cif_isp_dpcc_config - Configuration used by DPCC * - * Configuration used by Defect Pixel Cluster Correction + * Configuration used by Defect Pixel Cluster Correction. Three sets of methods + * can be configured and selected through the @set_use field. The result is the + * logical OR of all enabled sets. * - * @mode: dpcc output mode - * @output_mode: whether use hard coded methods - * @set_use: stage1 methods set - * @methods: methods config - * @ro_limits: rank order limits - * @rnd_offs: differential rank offsets for rank neighbor difference + * @mode: DPCC mode (RKISP1_CIF_ISP_DPCC_MODE_*) + * @output_mode: Interpolation output mode (RKISP1_CIF_ISP_DPCC_OUTPUT_MODE_*) + * @set_use: Methods sets selection (RKISP1_CIF_ISP_DPCC_SET_USE_*) + * @methods: Methods sets configuration + * @ro_limits: Rank order limits (RKISP1_CIF_ISP_DPCC_RO_LIMITS_*) + * @rnd_offs: Differential rank offsets for rank neighbor difference (RKISP1_CIF_ISP_DPCC_RND_OFFS_*) */ struct rkisp1_cif_isp_dpcc_config { __u32 mode; -- cgit v1.2.3 From 479747caa5bfa94b856bf47249006e6c8aa8be37 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 30 Aug 2022 12:37:24 +0200 Subject: media: cec: add support for Absolute Volume Control Add support for this new CEC message. This was added in HDMI 2.1a. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec-funcs.h | 14 ++++++++++++++ include/uapi/linux/cec.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index c3baaea0b8ef..d58fa1cdcb08 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -1568,6 +1568,20 @@ static inline void cec_ops_request_short_audio_descriptor(const struct cec_msg * } } +static inline void cec_msg_set_audio_volume_level(struct cec_msg *msg, + __u8 audio_volume_level) +{ + msg->len = 3; + msg->msg[1] = CEC_MSG_SET_AUDIO_VOLUME_LEVEL; + msg->msg[2] = audio_volume_level; +} + +static inline void cec_ops_set_audio_volume_level(const struct cec_msg *msg, + __u8 *audio_volume_level) +{ + *audio_volume_level = msg->msg[2]; +} + /* Audio Rate Control Feature */ static inline void cec_msg_set_audio_rate(struct cec_msg *msg, diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index 1d48da926216..b8e071abaea5 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -768,6 +768,7 @@ struct cec_event { #define CEC_OP_FEAT_DEV_HAS_SET_AUDIO_RATE 0x08 #define CEC_OP_FEAT_DEV_SINK_HAS_ARC_TX 0x04 #define CEC_OP_FEAT_DEV_SOURCE_HAS_ARC_RX 0x02 +#define CEC_OP_FEAT_DEV_HAS_SET_AUDIO_VOLUME_LEVEL 0x01 #define CEC_MSG_GIVE_FEATURES 0xa5 /* HDMI 2.0 */ @@ -1059,6 +1060,7 @@ struct cec_event { #define CEC_OP_AUD_FMT_ID_CEA861 0 #define CEC_OP_AUD_FMT_ID_CEA861_CXT 1 +#define CEC_MSG_SET_AUDIO_VOLUME_LEVEL 0x73 /* Audio Rate Control Feature */ #define CEC_MSG_SET_AUDIO_RATE 0x9a -- cgit v1.2.3 From 1c56ab991903dce60e905a08f431c0e6f79b9b9e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 9 Aug 2022 13:02:18 +0800 Subject: btrfs: separate BLOCK_GROUP_TREE compat RO flag from EXTENT_TREE_V2 The problem of long mount time caused by block group item search is already known for some time, and the solution of block group tree has been proposed. There is really no need to bound this feature into extent tree v2, just introduce compat RO flag, BLOCK_GROUP_TREE, to correctly solve the problem. All the code handling block group root is already in the upstream kernel, thus this patch really only needs to introduce the new compat RO flag. This patch introduces one extra artificial limitation on block group tree feature, that free space cache v2 and no-holes feature must be enabled to use this new compat RO feature. This artificial requirement is mostly to reduce the test combinations, and can be a guideline for future features, to mostly rely on the latest default features. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 7ada84e4a3ed..5655e89b962b 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -290,6 +290,12 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1) #define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2) +/* + * Put all block group items into a dedicated block group tree, greatly + * reducing mount time for large filesystem due to better locality. + */ +#define BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE (1ULL << 3) + #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) -- cgit v1.2.3 From e71564c0438a1c0cffc5c8eb302ec5d849103b08 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 24 Aug 2022 09:14:06 +0800 Subject: btrfs: introduce BTRFS_QGROUP_STATUS_FLAGS_MASK for later expansion Currently we only have 3 qgroup flags: - BTRFS_QGROUP_STATUS_FLAG_ON - BTRFS_QGROUP_STATUS_FLAG_RESCAN - BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT These flags match the on-disk flags used in btrfs_qgroup_status. But we're going to introduce extra runtime flags which will not reach disks. So here we introduce a new mask, BTRFS_QGROUP_STATUS_FLAGS_MASK, to make sure only those flags can reach disks. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 5f32a2a495dc..1f7a38ec6ac3 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -965,6 +965,10 @@ static inline __u16 btrfs_qgroup_level(__u64 qgroupid) */ #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) +#define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \ + BTRFS_QGROUP_STATUS_FLAG_RESCAN | \ + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) + #define BTRFS_QGROUP_STATUS_VERSION 1 struct btrfs_qgroup_status_item { -- cgit v1.2.3 From 0e253f7e558a3e250902ba2034091e0185448836 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Sep 2022 17:33:39 +0200 Subject: bpf: Return value in kprobe get_func_ip only for entry address Changing return value of kprobe's version of bpf_get_func_ip to return zero if the attach address is not on the function's entry point. For kprobes attached in the middle of the function we can't easily get to the function address especially now with the CONFIG_X86_KERNEL_IBT support. If user cares about current IP for kprobes attached within the function body, they can get it with PT_REGS_IP(ctx). Suggested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Martynas Pumputis Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20220926153340.1621984-6-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ead35f39f185..d6bd10759eaf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4951,6 +4951,7 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * 0 for kprobes placed within the function (not at the entry). * * u64 bpf_get_attach_cookie(void *ctx) * Description -- cgit v1.2.3 From 73dfe93ea1b319482e6d82a54fe06f953ceeeccb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 22 Sep 2022 20:41:40 +0200 Subject: headers: Remove some left-over license text Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX license identifier to uapi header files with a license") When the SPDX-License-Identifier tag has been added, the corresponding license text has not been removed. Signed-off-by: Christophe JAILLET Acked-by: Alexander Duyck Reviewed-by: Jiri Pirko Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/88410cddd31197ea26840d7dd71612bece8c6acf.1663871981.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/uapi/linux/tc_act/tc_bpf.h | 5 ----- include/uapi/linux/tc_act/tc_skbedit.h | 13 ------------- include/uapi/linux/tc_act/tc_skbmod.h | 7 +------ include/uapi/linux/tc_act/tc_tunnel_key.h | 5 ----- include/uapi/linux/tc_act/tc_vlan.h | 5 ----- 5 files changed, 1 insertion(+), 34 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h index 653c4f94f76e..fe6c8f8f3e8c 100644 --- a/include/uapi/linux/tc_act/tc_bpf.h +++ b/include/uapi/linux/tc_act/tc_bpf.h @@ -1,11 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2015 Jiri Pirko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_BPF_H diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 6cb6101208d0..64032513cc4c 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -2,19 +2,6 @@ /* * Copyright (c) 2008, Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * * Author: Alexander Duyck */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h index af6ef2cfbf3d..ac62c9a993ea 100644 --- a/include/uapi/linux/tc_act/tc_skbmod.h +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -1,12 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2016, Jamal Hadi Salim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. -*/ + */ #ifndef __LINUX_TC_SKBMOD_H #define __LINUX_TC_SKBMOD_H diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h index 3f10dc4e7a4b..49ad4033951b 100644 --- a/include/uapi/linux/tc_act/tc_tunnel_key.h +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -2,11 +2,6 @@ /* * Copyright (c) 2016, Amir Vadai * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_TUNNEL_KEY_H diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 5b306fe815cc..3e1f8e57cdd2 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -1,11 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ /* * Copyright (c) 2014 Jiri Pirko - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #ifndef __LINUX_TC_VLAN_H -- cgit v1.2.3 From 62e56ef57c04c0cacb33433d7984a4d71b690b3f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 25 Sep 2022 15:00:33 +0000 Subject: net: tls: Add ARIA-GCM algorithm RFC 6209 describes ARIA for TLS 1.2. ARIA-128-GCM and ARIA-256-GCM are defined in RFC 6209. This patch would offer performance increment and an opportunity for hardware offload. Benchmark results: iperf-ssl are used. CPU: intel i3-12100. TLS(openssl-3.0-dev) [ 3] 0.0- 1.0 sec 185 MBytes 1.55 Gbits/sec [ 3] 1.0- 2.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 2.0- 3.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 3.0- 4.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 4.0- 5.0 sec 186 MBytes 1.56 Gbits/sec [ 3] 0.0- 5.0 sec 927 MBytes 1.56 Gbits/sec kTLS(aria-generic) [ 3] 0.0- 1.0 sec 198 MBytes 1.66 Gbits/sec [ 3] 1.0- 2.0 sec 194 MBytes 1.62 Gbits/sec [ 3] 2.0- 3.0 sec 194 MBytes 1.63 Gbits/sec [ 3] 3.0- 4.0 sec 194 MBytes 1.63 Gbits/sec [ 3] 4.0- 5.0 sec 194 MBytes 1.62 Gbits/sec [ 3] 0.0- 5.0 sec 974 MBytes 1.63 Gbits/sec kTLS(aria-avx wirh GFNI) [ 3] 0.0- 1.0 sec 632 MBytes 5.30 Gbits/sec [ 3] 1.0- 2.0 sec 657 MBytes 5.51 Gbits/sec [ 3] 2.0- 3.0 sec 657 MBytes 5.51 Gbits/sec [ 3] 3.0- 4.0 sec 656 MBytes 5.50 Gbits/sec [ 3] 4.0- 5.0 sec 656 MBytes 5.50 Gbits/sec [ 3] 0.0- 5.0 sec 3.18 GBytes 5.47 Gbits/sec Signed-off-by: Taehee Yoo Reviewed-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20220925150033.24615-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/tls.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index f1157d8f4acd..b66a800389cc 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -100,6 +100,20 @@ #define TLS_CIPHER_SM4_CCM_TAG_SIZE 16 #define TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE 8 +#define TLS_CIPHER_ARIA_GCM_128 57 +#define TLS_CIPHER_ARIA_GCM_128_IV_SIZE 8 +#define TLS_CIPHER_ARIA_GCM_128_KEY_SIZE 16 +#define TLS_CIPHER_ARIA_GCM_128_SALT_SIZE 4 +#define TLS_CIPHER_ARIA_GCM_128_TAG_SIZE 16 +#define TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE 8 + +#define TLS_CIPHER_ARIA_GCM_256 58 +#define TLS_CIPHER_ARIA_GCM_256_IV_SIZE 8 +#define TLS_CIPHER_ARIA_GCM_256_KEY_SIZE 32 +#define TLS_CIPHER_ARIA_GCM_256_SALT_SIZE 4 +#define TLS_CIPHER_ARIA_GCM_256_TAG_SIZE 16 +#define TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -156,6 +170,22 @@ struct tls12_crypto_info_sm4_ccm { unsigned char rec_seq[TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_aria_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_ARIA_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_ARIA_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_ARIA_GCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE]; +}; + +struct tls12_crypto_info_aria_gcm_256 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_ARIA_GCM_256_IV_SIZE]; + unsigned char key[TLS_CIPHER_ARIA_GCM_256_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_ARIA_GCM_256_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE]; +}; + enum { TLS_INFO_UNSPEC, TLS_INFO_VERSION, -- cgit v1.2.3 From 3137f2e60098bccdf9b3a744747b06a96addab9e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 26 Sep 2022 14:07:58 +0300 Subject: firmware/psci: Add debugfs support to ease debugging To ease debugging of PSCI supported features, add debugfs file called 'psci' describing PSCI and SMC CC versions, enabled features and options. Signed-off-by: Dmitry Baryshkov Reviewed-by: Mark Brown Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20220926110758.666922-1-dmitry.baryshkov@linaro.org' Signed-off-by: Arnd Bergmann --- include/uapi/linux/psci.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 2bf93c0d6354..3511095c2702 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -48,12 +48,26 @@ #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) +#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) +#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) +#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) +#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) +#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) + #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) +#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) +#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) +#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) +#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) + #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff -- cgit v1.2.3 From f0d74c4da1f060d2a66976193712a5e6abd361f5 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:53 -0700 Subject: bpf: Parameterize task iterators. Allow creating an iterator that loops through resources of one thread/process. People could only create iterators to loop through all resources of files, vma, and tasks in the system, even though they were interested in only the resources of a specific task or process. Passing the additional parameters, people can now create an iterator to go through all resources or only the resources of a task. Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d6bd10759eaf..455b21a53aac 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,12 @@ union bpf_iter_link_info { __u32 cgroup_fd; __u64 cgroup_id; } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ -- cgit v1.2.3 From 21fb6f2aa3890b0d0abf88b7756d0098e9367a7c Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Mon, 26 Sep 2022 11:49:54 -0700 Subject: bpf: Handle bpf_link_info for the parameterized task BPF iterators. Add new fields to bpf_link_info that users can query it through bpf_obj_get_info_by_fd(). Signed-off-by: Kui-Feng Lee Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220926184957.208194-3-kuifeng@fb.com --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 455b21a53aac..3075018a4ef8 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6265,6 +6265,10 @@ struct bpf_link_info { __u64 cgroup_id; __u32 order; } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; }; } iter; struct { -- cgit v1.2.3 From 1260cd04a601e0e02e09fa332111b8639611970d Mon Sep 17 00:00:00 2001 From: Nate Yocom Date: Wed, 28 Sep 2022 18:23:22 -0700 Subject: Input: add ABS_PROFILE to uapi and documentation Define new ABS_PROFILE axis for input devices which need it, e.g. X-Box Adaptive Controller and X-Box Elite 2. Signed-off-by: Nate Yocom Link: https://lore.kernel.org/r/20220908173930.28940-4-nate@yocom.org Signed-off-by: Dmitry Torokhov --- include/uapi/linux/input-event-codes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index dff8e7f17074..7ad931a32970 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -862,6 +862,7 @@ #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 +#define ABS_PROFILE 0x21 #define ABS_MISC 0x28 -- cgit v1.2.3 From 17601bfed909fa080fcfd227b57da2bd4dc2d2a6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Sep 2022 15:51:16 +0100 Subject: KVM: Add KVM_CAP_DIRTY_LOG_RING_ACQ_REL capability and config option In order to differenciate between architectures that require no extra synchronisation when accessing the dirty ring and those who do, add a new capability (KVM_CAP_DIRTY_LOG_RING_ACQ_REL) that identify the latter sort. TSO architectures can obviously advertise both, while relaxed architectures must only advertise the ACQ_REL version. This requires some configuration symbol rejigging, with HAVE_KVM_DIRTY_RING being only indirectly selected by two top-level config symbols: - HAVE_KVM_DIRTY_RING_TSO for strongly ordered architectures (x86) - HAVE_KVM_DIRTY_RING_ACQ_REL for weakly ordered architectures (arm64) Suggested-by: Paolo Bonzini Signed-off-by: Marc Zyngier Reviewed-by: Gavin Shan Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/20220926145120.27974-3-maz@kernel.org --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eed0315a77a6..0d5d4419139a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1177,6 +1177,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 +#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ee3e88dfec23153d0675b5d00522297b9adf657c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:51 +0530 Subject: perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO} PERF_MEM_LVLNUM_EXTN_MEM which can be used to indicate accesses to extension memory like CXL etc. PERF_MEM_LVL_IO can be used for IO accesses but it can not distinguish between local and remote IO. Introduce new field PERF_MEM_LVLNUM_IO which can be clubbed with PERF_MEM_REMOTE_REMOTE to indicate Remote IO accesses. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-2-ravi.bangoria@amd.com --- include/uapi/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e639c74cf5fb..4ae3c249f675 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1336,7 +1336,9 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -- cgit v1.2.3 From cfef80bad4cf79cdc964a53c98254dfa462be83f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:57 +0530 Subject: perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file PERF_MEM_SNOOPX_PEER is defined only in tools uapi header. Although it's used only by perf tool, not defining it in kernel header can create problems in future. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-8-ravi.bangoria@amd.com --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4ae3c249f675..85be78e0e7f6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1356,7 +1356,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ -- cgit v1.2.3 From 2fff00c81d4c37a037cf704d2d219fbcb45aea3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 23 Sep 2022 17:42:07 +0200 Subject: landlock: Fix documentation style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that all code should use double backquotes, which is also used to convert "%" defines. Let's use an homogeneous style and remove all use of simple backquotes (which should only be used for emphasis). Cc: Günther Noack Cc: Paul Moore Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20220923154207.3311629-4-mic@digikod.net --- include/uapi/linux/landlock.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 23df4e0e8ace..9c4bcc37a455 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -26,7 +26,7 @@ struct landlock_ruleset_attr { * Landlock filesystem access rights that are not part of * handled_access_fs are allowed. This is needed for backward * compatibility reasons. One exception is the - * LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly + * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly * handled, but must still be explicitly handled to add new rules with * this access right. */ @@ -128,11 +128,11 @@ struct landlock_path_beneath_attr { * hierarchy must also always have the same or a superset of restrictions of * the source hierarchy. If it is not the case, or if the domain doesn't * handle this access right, such actions are denied by default with errno - * set to EXDEV. Linking also requires a LANDLOCK_ACCESS_FS_MAKE_* access - * right on the destination directory, and renaming also requires a - * LANDLOCK_ACCESS_FS_REMOVE_* access right on the source's (file or + * set to ``EXDEV``. Linking also requires a ``LANDLOCK_ACCESS_FS_MAKE_*`` + * access right on the destination directory, and renaming also requires a + * ``LANDLOCK_ACCESS_FS_REMOVE_*`` access right on the source's (file or * directory) parent. Otherwise, such actions are denied with errno set to - * EACCES. The EACCES errno prevails over EXDEV to let user space + * ``EACCES``. The ``EACCES`` errno prevails over ``EXDEV`` to let user space * efficiently deal with an unrecoverable error. * * .. warning:: -- cgit v1.2.3 From 5493a2ad0d20944b16aba7ed7a951a43ad1f5fba Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 27 Sep 2022 14:23:06 -0700 Subject: docs: netlink: clarify the historical baggage of Netlink flags nlmsg_flags are full of historical baggage, inconsistencies and strangeness. Try to document it more thoroughly. Explain the meaning of the ECHO flag (and while at it clarify the comment in the uAPI). Handwave a little about the NEW request flags and how they make sense on the surface but cater to really old paradigm before commands were a thing. I will add more notes on how to make use of ECHO and discouragement for reuse of flags to the kernel-side documentation. Link: https://lore.kernel.org/r/20220927212306.823862-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index e0689dbd2cde..e2ae82e3f9f7 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -62,7 +62,7 @@ struct nlmsghdr { #define NLM_F_REQUEST 0x01 /* It is request message. */ #define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */ #define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */ -#define NLM_F_ECHO 0x08 /* Echo this request */ +#define NLM_F_ECHO 0x08 /* Receive resulting notifications */ #define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ #define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ -- cgit v1.2.3 From a54fc09e4cba3004443aa05979f8c678196c8226 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 28 Sep 2022 12:51:58 +0300 Subject: net/sched: taprio: allow user input of per-tc max SDU IEEE 802.1Q clause 12.29.1.1 "The queueMaxSDUTable structure and data types" and 8.6.8.4 "Enhancements for scheduled traffic" talk about the existence of a per traffic class limitation of maximum frame sizes, with a fallback on the port-based MTU. As far as I am able to understand, the 802.1Q Service Data Unit (SDU) represents the MAC Service Data Unit (MSDU, i.e. L2 payload), excluding any number of prepended VLAN headers which may be otherwise present in the MSDU. Therefore, the queueMaxSDU is directly comparable to the device MTU (1500 means L2 payload sizes are accepted, or frame sizes of 1518 octets, or 1522 plus one VLAN header). Drivers which offload this are directly responsible of translating into other units of measurement. To keep the fast path checks optimized, we keep 2 arrays in the qdisc, one for max_sdu translated into frame length (so that it's comparable to skb->len), and another for offloading and for dumping back to the user. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_sched.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index f292b467b27f..000eec106856 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -1232,6 +1232,16 @@ enum { #define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST _BITUL(0) #define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD _BITUL(1) +enum { + TCA_TAPRIO_TC_ENTRY_UNSPEC, + TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */ + + /* add new constants above here */ + __TCA_TAPRIO_TC_ENTRY_CNT, + TCA_TAPRIO_TC_ENTRY_MAX = (__TCA_TAPRIO_TC_ENTRY_CNT - 1) +}; + enum { TCA_TAPRIO_ATTR_UNSPEC, TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ @@ -1245,6 +1255,7 @@ enum { TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, /* s64 */ TCA_TAPRIO_ATTR_FLAGS, /* u32 */ TCA_TAPRIO_ATTR_TXTIME_DELAY, /* u32 */ + TCA_TAPRIO_ATTR_TC_ENTRY, /* nest */ __TCA_TAPRIO_ATTR_MAX, }; -- cgit v1.2.3 From 650ae67bbf7ba5ac193f053969612fbb93247b64 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 27 Sep 2022 18:53:38 -0400 Subject: counter: Introduce the Signal polarity component The Signal polarity component represents the active level of a respective Signal. There are two possible states: positive (rising edge) and negative (falling edge); enum counter_signal_polarity represents these states. A convenience macro COUNTER_COMP_POLARITY() is provided for driver authors to declare a Signal polarity component. Cc: Julien Panis Link: https://lore.kernel.org/r/8f47d6e1db71a11bb1e2666f8e2a6e9d256d4131.1664204990.git.william.gray@linaro.org/ Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/b6e53438badcb6318997d13dd2fc052f97d808ac.1664318353.git.william.gray@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/counter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index 96c5ffd368ad..e9610e1944dc 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -153,4 +153,10 @@ enum counter_synapse_action { COUNTER_SYNAPSE_ACTION_BOTH_EDGES, }; +/* Signal polarity values */ +enum counter_signal_polarity { + COUNTER_SIGNAL_POLARITY_POSITIVE, + COUNTER_SIGNAL_POLARITY_NEGATIVE, +}; + #endif /* _UAPI_COUNTER_H_ */ -- cgit v1.2.3 From 45d2918520b2d8e640e4fb3fbf664dfb823dc520 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Tue, 27 Sep 2022 18:53:40 -0400 Subject: counter: Introduce the Count capture component Some devices provide a latch function to save historic Count values. This patch standardizes exposure of such functionality as Count capture components. A COUNTER_COMP_CAPTURE macro is provided for driver authors to define a capture component. A new event COUNTER_EVENT_CAPTURE is introduced to represent Count value capture events. Cc: Julien Panis Link: https://lore.kernel.org/r/c239572ab4208d0d6728136e82a88ad464369a7a.1664204990.git.william.gray@linaro.org/ Signed-off-by: William Breathitt Gray Link: https://lore.kernel.org/r/3cebaa0b807a225eb277d771504fe6dba7269ffd.1664318353.git.william.gray@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/counter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h index e9610e1944dc..8ab12d731e3b 100644 --- a/include/uapi/linux/counter.h +++ b/include/uapi/linux/counter.h @@ -63,6 +63,8 @@ enum counter_event_type { COUNTER_EVENT_INDEX, /* State of counter is changed */ COUNTER_EVENT_CHANGE_OF_STATE, + /* Count value captured */ + COUNTER_EVENT_CAPTURE, }; /** -- cgit v1.2.3 From 9cda70f622cdcf049521a9c2886e5fd8a90a0591 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Fri, 30 Sep 2022 11:57:39 +0530 Subject: io_uring: introduce fixed buffer support for io_uring_cmd Add IORING_URING_CMD_FIXED flag that is to be used for sending io_uring command with previously registered buffers. User-space passes the buffer index in sqe->buf_index, same as done in read/write variants that uses fixed buffers. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20220930062749.152261-3-anuj20.g@samsung.com [axboe: shuffle valid flags check before acting on it] Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92f29d9505a6..ab7458033ee3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -56,6 +56,7 @@ struct io_uring_sqe { __u32 hardlink_flags; __u32 xattr_flags; __u32 msg_ring_flags; + __u32 uring_cmd_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -219,6 +220,14 @@ enum io_uring_op { IORING_OP_LAST, }; +/* + * sqe->uring_cmd_flags + * IORING_URING_CMD_FIXED use registered buffer; pass thig flag + * along with setting sqe->buf_index. + */ +#define IORING_URING_CMD_FIXED (1U << 0) + + /* * sqe->fsync_flags */ -- cgit v1.2.3 From 18ff0bcda6d1dd3d53b4ce3f03e61bf1a648f960 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 3 Oct 2022 08:52:00 +0200 Subject: ethtool: add interface to interact with Ethernet Power Equipment Add interface to support Power Sourcing Equipment. At current step it provides generic way to address all variants of PSE devices as defined in IEEE 802.3-2018 but support only objects specified for IEEE 802.3-2018 104.4 PoDL Power Sourcing Equipment (PSE). Currently supported and mandatory objects are: IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl This is minimal interface needed to control PSE on each separate ethernet port but it provides not all mandatory objects specified in IEEE 802.3-2018. Since "PoDL PSE" and "PSE" have similar names, but some different values I decide to not merge them and keep separate naming schema. This should allow as to be as close to IEEE 802.3 spec as possible and avoid name conflicts in the future. This implementation is connected to PHYs instead of MACs because PSE auto classification can potentially interfere with PHY auto negotiation. So, may be some extra PHY related initialization will be needed. With WIP version of ethtools interaction with PSE capable link looks as following: $ ip l ... 5: t1l1@eth0: .. ... $ ethtool --show-pse t1l1 PSE attributs for t1l1: PoDL PSE Admin State: disabled PoDL PSE Power Detection Status: disabled $ ethtool --set-pse t1l1 podl-pse-admin-control enable $ ethtool --show-pse t1l1 PSE attributs for t1l1: PoDL PSE Admin State: enabled PoDL PSE Power Detection Status: delivering power Signed-off-by: kernel test robot Signed-off-by: Oleksij Rempel Reviewed-by: Bagas Sanjaya Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 45 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 16 +++++++++++++ 2 files changed, 61 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index fe9893d1485d..dc2aa3d75b39 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -736,6 +736,51 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are + * unknown + * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled + * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled + */ +enum ethtool_podl_pse_admin_state { + ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, + ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, + ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, +}; + +/** + * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is + * asserted true when the PoDL PSE state diagram variable mr_pse_enable is + * false" + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is + * asserted true when either of the PSE state diagram variables + * pi_detecting or pi_classifying is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” + * is asserted true when the PoDL PSE state diagram variable pi_powered is + * true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted + * true when the PoDL PSE state diagram variable pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true + * when the logical combination of the PoDL PSE state diagram variables + * pi_prebiased*!pi_sleeping is true." + * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted + * true when the PoDL PSE state diagram variable overload_held is true." + */ +enum ethtool_podl_pse_pw_d_status { + ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, + ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, + ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, + ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, + ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, + ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, + ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 408a664fad59..bb57084ac524 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -49,6 +49,8 @@ enum { ETHTOOL_MSG_PHC_VCLOCKS_GET, ETHTOOL_MSG_MODULE_GET, ETHTOOL_MSG_MODULE_SET, + ETHTOOL_MSG_PSE_GET, + ETHTOOL_MSG_PSE_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -94,6 +96,7 @@ enum { ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, ETHTOOL_MSG_MODULE_GET_REPLY, ETHTOOL_MSG_MODULE_NTF, + ETHTOOL_MSG_PSE_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -863,6 +866,19 @@ enum { ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) }; +/* Power Sourcing Equipment */ +enum { + ETHTOOL_A_PSE_UNSPEC, + ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PSE_CNT, + ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 90fea5a800c3dd80fb8ad9a02929bcef5fde42b8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 27 Sep 2022 15:48:08 +0800 Subject: vdpa: device feature provisioning This patch allows the device features to be provisioned through netlink. A new attribute is introduced to allow the userspace to pass a 64bit device features during device adding. This provides several advantages: - Allow to provision a subset of the features to ease the cross vendor live migration. - Better debug-ability for vDPA framework and parent. Reviewed-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20220927074810.28627-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vdpa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 25c55cab3d7c..9dc855f37c59 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -52,6 +52,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + VDPA_ATTR_DEV_FEATURES, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From e60d64074214db7207fc13c25ee39d8d47cb4a34 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Wed, 21 Sep 2022 11:27:29 +0300 Subject: virtio_blk: add SECURE ERASE command support Support for the VIRTIO_BLK_F_SECURE_ERASE VirtIO feature. A device that offers this feature can receive VIRTIO_BLK_T_SECURE_ERASE commands. A device which supports this feature has the following fields in the virtio config: - max_secure_erase_sectors - max_secure_erase_seg - secure_erase_sector_alignment max_secure_erase_sectors and secure_erase_sector_alignment are expressed in 512-byte units. Every secure erase command has the following fields: - sectors: The starting offset in 512-byte units. - num_sectors: The number of sectors. Signed-off-by: Alvaro Karsz Message-Id: <20220921082729.2516779-1-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- include/uapi/linux/virtio_blk.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index d888f013d9ff..58e70b24b504 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -40,6 +40,7 @@ #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ +#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ /* Legacy feature bits */ #ifndef VIRTIO_BLK_NO_LEGACY @@ -121,6 +122,21 @@ struct virtio_blk_config { __u8 write_zeroes_may_unmap; __u8 unused1[3]; + + /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ + /* + * The maximum secure erase sectors (in 512-byte sectors) for + * one segment. + */ + __virtio32 max_secure_erase_sectors; + /* + * The maximum number of secure erase segments in a + * secure erase command. + */ + __virtio32 max_secure_erase_seg; + /* Secure erase commands must be aligned to this number of sectors. */ + __virtio32 secure_erase_sector_alignment; + } __attribute__((packed)); /* @@ -155,6 +171,9 @@ struct virtio_blk_config { /* Write zeroes command */ #define VIRTIO_BLK_T_WRITE_ZEROES 13 +/* Secure erase command */ +#define VIRTIO_BLK_T_SECURE_ERASE 14 + #ifndef VIRTIO_BLK_NO_LEGACY /* Barrier before this op. */ #define VIRTIO_BLK_T_BARRIER 0x80000000 -- cgit v1.2.3 From 228565100def593df0f26ee07d5fb810039454d5 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 29 Sep 2022 09:45:50 +0800 Subject: vDPA: allow userspace to query features of a vDPA device This commit adds a new vDPA netlink attribution VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES. Userspace can query features of vDPA devices through this new attr. This commit invokes vdpa_config_ops.get_config() rather than vdpa_get_config_unlocked() to read the device config spcae, so no races in vdpa_set_features_unlocked() Userspace tool iproute2 example: $ vdpa dev config show vdpa0 vdpa0: mac 00:e8:ca:11:be:05 link up link_announce false max_vq_pairs 4 mtu 1500 negotiated_features MRG_RXBUF CTRL_VQ MQ VERSION_1 ACCESS_PLATFORM dev_features MTU MAC MRG_RXBUF CTRL_VQ MQ ANY_LAYOUT VERSION_1 ACCESS_PLATFORM Signed-off-by: Zhu Lingshan Message-Id: <20220929014555.112323-2-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vdpa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 9dc855f37c59..9bd79235c875 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -46,6 +46,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_NEGOTIATED_FEATURES, /* u64 */ VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, /* u32 */ + /* virtio features that are supported by the vDPA management device */ VDPA_ATTR_DEV_SUPPORTED_FEATURES, /* u64 */ VDPA_ATTR_DEV_QUEUE_INDEX, /* u32 */ @@ -54,6 +55,9 @@ enum vdpa_attr { VDPA_ATTR_DEV_FEATURES, /* u64 */ + /* virtio features that are supported by the vDPA device */ + VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, /* u64 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.2.3 From 0ec8ce07394442d722806fe61b901a5b2b17249d Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 14 Oct 2022 15:25:41 -0700 Subject: dmaengine: idxd: Do not enable user type Work Queue without Shared Virtual Addressing When the idxd_user_drv driver is bound to a Work Queue (WQ) device without IOMMU or with IOMMU Passthrough without Shared Virtual Addressing (SVA), the application gains direct access to physical memory via the device by programming physical address to a submitted descriptor. This allows direct userspace read and write access to arbitrary physical memory. This is inconsistent with the security goals of a good kernel API. Unlike vfio_pci driver, the IDXD char device driver does not provide any ways to pin user pages and translate the address from user VA to IOVA or PA without IOMMU SVA. Therefore the application has no way to instruct the device to perform DMA function. This makes the char device not usable for normal application usage. Since user type WQ without SVA cannot be used for normal application usage and presents the security issue, bind idxd_user_drv driver and enable user type WQ only when SVA is enabled (i.e. user PASID is enabled). Fixes: 448c3de8ac83 ("dmaengine: idxd: create user driver for wq 'device'") Cc: stable@vger.kernel.org Suggested-by: Arjan Van De Ven Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20221014222541.3912195-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 095299c75828..2b9e7feba3f3 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -29,6 +29,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, + IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 -- cgit v1.2.3 From 8da7f0976b9071b528c545008de9d10cc81883b1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 12 Oct 2022 16:46:17 +0100 Subject: media: videodev2.h: V4L2_DV_BT_BLANKING_HEIGHT should check 'interlaced' If it is a progressive (non-interlaced) format, then ignore the interlaced timing values. Signed-off-by: Hans Verkuil Fixes: 7f68127fa11f ([media] videodev2.h: defines to calculate blanking and frame sizes) Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 86cae23cc446..29da1f4b4578 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1601,7 +1601,8 @@ struct v4l2_bt_timings { ((bt)->width + V4L2_DV_BT_BLANKING_WIDTH(bt)) #define V4L2_DV_BT_BLANKING_HEIGHT(bt) \ ((bt)->vfrontporch + (bt)->vsync + (bt)->vbackporch + \ - (bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) + ((bt)->interlaced ? \ + ((bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) : 0)) #define V4L2_DV_BT_FRAME_HEIGHT(bt) \ ((bt)->height + V4L2_DV_BT_BLANKING_HEIGHT(bt)) -- cgit v1.2.3 From cb6c18b5a41622c7a439508f7421f8766a91cb87 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 1 Oct 2022 11:37:05 +0530 Subject: perf/mem: Rename PERF_MEM_LVLNUM_EXTN_MEM to PERF_MEM_LVLNUM_CXL PERF_MEM_LVLNUM_EXTN_MEM was introduced to cover CXL devices but it's bit ambiguous name and also not generic enough to cover cxl.cache and cxl.io devices. Rename it to PERF_MEM_LVLNUM_CXL to be more specific. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/f6268268-b4e9-9ed6-0453-65792644d953@amd.com --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 85be78e0e7f6..ccb7f5dad59b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1337,7 +1337,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ /* 5-0x8 available */ -#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ +#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ -- cgit v1.2.3 From 986d93f55bdeab1cac858d1e47b41fac10b2d7f6 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 10:10:21 +0800 Subject: audit: fix undefined behavior in bit shift for AUDIT_BIT Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in kernel/auditfilter.c:179:23 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c audit_register_class+0x9d/0x137 audit_classes_init+0x4d/0xb8 do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Signed-off-by: Gaosheng Cui [PM: remove bad 'Fixes' tag as issue predates git, added in v2.6.6-rc1] Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 7c1dc818b1d5..d676ed2b246e 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -187,7 +187,7 @@ #define AUDIT_MAX_KEY_LEN 256 #define AUDIT_BITMASK_SIZE 64 #define AUDIT_WORD(nr) ((__u32)((nr)/32)) -#define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) +#define AUDIT_BIT(nr) (1U << ((nr) - AUDIT_WORD(nr)*32)) #define AUDIT_SYSCALL_CLASSES 16 #define AUDIT_CLASS_DIR_WRITE 0 -- cgit v1.2.3 From aec1dc972d27c837d1406310dab5170189eb01e5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 2 Nov 2022 11:25:16 -0700 Subject: net/ipv4: Fix linux/in.h header dependencies __DECLARE_FLEX_ARRAY is defined in include/uapi/linux/stddef.h but doesn't seem to be explicitly included from include/uapi/linux/in.h, which breaks BPF selftests builds (once we sync linux/stddef.h into tools/include directory in the next patch). Fix this by explicitly including linux/stddef.h. Given this affects BPF CI and bpf tree, targeting this for bpf tree. Fixes: 5854a09b4957 ("net/ipv4: Use __DECLARE_FLEX_ARRAY() helper") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Cc: Jakub Kicinski Link: https://lore.kernel.org/bpf/20221102182517.2675301-1-andrii@kernel.org --- include/uapi/linux/in.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index f243ce665f74..07a4cb149305 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -20,6 +20,7 @@ #define _UAPI_LINUX_IN_H #include +#include #include #include -- cgit v1.2.3 From 46653972e3ea64f79e7f8ae3aa41a4d3fdb70a13 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 19:25:36 +0800 Subject: capabilities: fix undefined behavior in bit shift for CAP_TO_MASK Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in security/commoncap.c:1252:2 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c cap_task_prctl+0x561/0x6f0 security_task_prctl+0x5a/0xb0 __x64_sys_prctl+0x61/0x8f0 do_syscall_64+0x58/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: e338d263a76a ("Add 64-bit capability support to the kernel") Signed-off-by: Gaosheng Cui Acked-by: Andrew G. Morgan Reviewed-by: Serge Hallyn Signed-off-by: Paul Moore --- include/uapi/linux/capability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 463d1ba2232a..3d61a0ae055d 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -426,7 +426,7 @@ struct vfs_ns_cap_data { */ #define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */ -#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */ +#define CAP_TO_MASK(x) (1U << ((x) & 31)) /* mask for indexed __u32 */ #endif /* _UAPI_LINUX_CAPABILITY_H */ -- cgit v1.2.3 From 6dcabcd398946e2b0b776a8310291aeebe1ca0e6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 6 Nov 2022 13:17:27 -0700 Subject: io_uring: fix typo in io_uring.h comment Just a basic s/thig/this swap, fixing up a typo introduced by a commit added in the 6.1 release. Fixes: 9cda70f622cd ("io_uring: introduce fixed buffer support for io_uring_cmd") Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ab7458033ee3..2df3225b562f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -222,7 +222,7 @@ enum io_uring_op { /* * sqe->uring_cmd_flags - * IORING_URING_CMD_FIXED use registered buffer; pass thig flag + * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. */ #define IORING_URING_CMD_FIXED (1U << 0) -- cgit v1.2.3 From 58e0be1ef6118c5352b56a4d06e974c5599993a5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 15 Nov 2022 22:24:00 +0800 Subject: net: use struct_group to copy ip/ipv6 header addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel test robot reported warnings when build bonding module with make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/net/bonding/: from ../drivers/net/bonding/bond_main.c:35: In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v4addrs’ at ../include/net/ip.h:566:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3984:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v6addrs’ at ../include/net/ipv6.h:900:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3994:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is because we try to copy the whole ip/ip6 address to the flow_key, while we only point the to ip/ip6 saddr. Note that since these are UAPI headers, __struct_group() is used to avoid the compiler warnings. Reported-by: kernel test robot Fixes: c3f8324188fa ("net: Add full IPv6 addresses to flow_keys") Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20221115142400.1204786-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/uapi/linux/ip.h | 6 ++++-- include/uapi/linux/ipv6.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 961ec16a26b8..874a92349bf5 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -100,8 +100,10 @@ struct iphdr { __u8 ttl; __u8 protocol; __sum16 check; - __be32 saddr; - __be32 daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + __be32 saddr; + __be32 daddr; + ); /*The options start here. */ }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 03cdbe798fe3..81f4243bebb1 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -130,8 +130,10 @@ struct ipv6hdr { __u8 nexthdr; __u8 hop_limit; - struct in6_addr saddr; - struct in6_addr daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + struct in6_addr saddr; + struct in6_addr daddr; + ); }; -- cgit v1.2.3