diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 16:22:30 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-13 16:22:30 -0700 |
| commit | 23acda7c221a76ff711d65f4ca90029d43b249a0 (patch) | |
| tree | 3e7745c9210489864e153990c06833d7d47a3dcd /include/uapi/linux | |
| parent | 7fe6ac157b7e15c8976bd62ad7cb98e248884e83 (diff) | |
| parent | c5e9f6a96bf7379da87df1b852b90527e242b56f (diff) | |
Merge tag 'for-7.1/io_uring-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe:
- Add a callback driven main loop for io_uring, and BPF struct_ops
on top to allow implementing custom event loop logic
- Decouple IOPOLL from being a ring-wide all-or-nothing setting,
allowing IOPOLL use cases to also issue certain white listed
non-polled opcodes
- Timeout improvements. Migrate internal timeout storage from
timespec64 to ktime_t for simpler arithmetic and avoid copying of
timespec data
- Zero-copy receive (zcrx) updates:
- Add a device-less mode (ZCRX_REG_NODEV) for testing and
experimentation where data flows through the copy fallback path
- Fix two-step unregistration regression, DMA length calculations,
xarray mark usage, and a potential 32-bit overflow in id
shifting
- Refactoring toward multi-area support: dedicated refill queue
struct, consolidated DMA syncing, netmem array refilling format,
and guard-based locking
- Zero-copy transmit (zctx) cleanup:
- Unify io_send_zc() and io_sendmsg_zc() into a single function
- Add vectorized registered buffer send for IORING_OP_SEND_ZC
- Add separate notification user_data via sqe->addr3 so
notification and completion CQEs can be distinguished without
extra reference counting
- Switch struct io_ring_ctx internal bitfields to explicit flag bits
with atomic-safe accessors, and annotate the known harmless races on
those flags
- Various optimizations caching ctx and other request fields in local
variables to avoid repeated loads, and cleanups for tctx setup, ring
fd registration, and read path early returns
* tag 'for-7.1/io_uring-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (58 commits)
io_uring: unify getting ctx from passed in file descriptor
io_uring/register: don't get a reference to the registered ring fd
io_uring/tctx: clean up __io_uring_add_tctx_node() error handling
io_uring/tctx: have io_uring_alloc_task_context() return tctx
io_uring/timeout: use 'ctx' consistently
io_uring/rw: clean up __io_read() obsolete comment and early returns
io_uring/zcrx: use correct mmap off constants
io_uring/zcrx: use dma_len for chunk size calculation
io_uring/zcrx: don't clear not allocated niovs
io_uring/zcrx: don't use mark0 for allocating xarray
io_uring: cast id to u64 before shifting in io_allocate_rbuf_ring()
io_uring/zcrx: reject REG_NODEV with large rx_buf_size
io_uring/cancel: validate opcode for IORING_ASYNC_CANCEL_OP
io_uring/rsrc: use io_cache_free() to free node
io_uring/zcrx: rename zcrx [un]register functions
io_uring/zcrx: check ctrl op payload struct sizes
io_uring/zcrx: cache fallback availability in zcrx ctx
io_uring/zcrx: warn on a repeated area append
io_uring/zcrx: consolidate dma syncing
io_uring/zcrx: netmem array as refiling format
...
Diffstat (limited to 'include/uapi/linux')
| -rw-r--r-- | include/uapi/linux/io_uring.h | 101 | ||||
| -rw-r--r-- | include/uapi/linux/io_uring/zcrx.h | 115 |
2 files changed, 122 insertions, 94 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1ff16141c8a5..17ac1b785440 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,6 +10,8 @@ #include <linux/fs.h> #include <linux/types.h> +#include <linux/io_uring/zcrx.h> + /* * this file is shared with liburing and that has to autodetect * if linux/time_types.h is available or not, it can @@ -341,6 +343,10 @@ enum io_uring_op { /* * sqe->timeout_flags + * + * IORING_TIMEOUT_IMMEDIATE_ARG: If set, sqe->addr stores the timeout + * value in nanoseconds instead of + * pointing to a timespec. */ #define IORING_TIMEOUT_ABS (1U << 0) #define IORING_TIMEOUT_UPDATE (1U << 1) @@ -349,6 +355,7 @@ enum io_uring_op { #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_MULTISHOT (1U << 6) +#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) /* @@ -1050,100 +1057,6 @@ struct io_timespec { __u64 tv_nsec; }; -/* Zero copy receive refill queue entry */ -struct io_uring_zcrx_rqe { - __u64 off; - __u32 len; - __u32 __pad; -}; - -struct io_uring_zcrx_cqe { - __u64 off; - __u64 __pad; -}; - -/* The bit from which area id is encoded into offsets */ -#define IORING_ZCRX_AREA_SHIFT 48 -#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1)) - -struct io_uring_zcrx_offsets { - __u32 head; - __u32 tail; - __u32 rqes; - __u32 __resv2; - __u64 __resv[2]; -}; - -enum io_uring_zcrx_area_flags { - IORING_ZCRX_AREA_DMABUF = 1, -}; - -struct io_uring_zcrx_area_reg { - __u64 addr; - __u64 len; - __u64 rq_area_token; - __u32 flags; - __u32 dmabuf_fd; - __u64 __resv2[2]; -}; - -enum zcrx_reg_flags { - ZCRX_REG_IMPORT = 1, -}; - -enum zcrx_features { - /* - * The user can ask for the desired rx page size by passing the - * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. - */ - ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, -}; - -/* - * Argument for IORING_REGISTER_ZCRX_IFQ - */ -struct io_uring_zcrx_ifq_reg { - __u32 if_idx; - __u32 if_rxq; - __u32 rq_entries; - __u32 flags; - - __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ - __u64 region_ptr; /* struct io_uring_region_desc * */ - - struct io_uring_zcrx_offsets offsets; - __u32 zcrx_id; - __u32 rx_buf_len; - __u64 __resv[3]; -}; - -enum zcrx_ctrl_op { - ZCRX_CTRL_FLUSH_RQ, - ZCRX_CTRL_EXPORT, - - __ZCRX_CTRL_LAST, -}; - -struct zcrx_ctrl_flush_rq { - __u64 __resv[6]; -}; - -struct zcrx_ctrl_export { - __u32 zcrx_fd; - __u32 __resv1[11]; -}; - -struct zcrx_ctrl { - __u32 zcrx_id; - __u32 op; /* see enum zcrx_ctrl_op */ - __u64 __resv[2]; - - union { - struct zcrx_ctrl_export zc_export; - struct zcrx_ctrl_flush_rq zc_flush; - }; -}; - #ifdef __cplusplus } #endif diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h new file mode 100644 index 000000000000..5ce02c7a6096 --- /dev/null +++ b/include/uapi/linux/io_uring/zcrx.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Header file for the io_uring zerocopy receive (zcrx) interface. + * + * Copyright (C) 2026 Pavel Begunkov + * Copyright (C) 2026 David Wei + * Copyright (C) Meta Platforms, Inc. + */ +#ifndef LINUX_IO_ZCRX_H +#define LINUX_IO_ZCRX_H + +#include <linux/types.h> + +/* Zero copy receive refill queue entry */ +struct io_uring_zcrx_rqe { + __u64 off; + __u32 len; + __u32 __pad; +}; + +struct io_uring_zcrx_cqe { + __u64 off; + __u64 __pad; +}; + +/* The bit from which area id is encoded into offsets */ +#define IORING_ZCRX_AREA_SHIFT 48 +#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1)) + +struct io_uring_zcrx_offsets { + __u32 head; + __u32 tail; + __u32 rqes; + __u32 __resv2; + __u64 __resv[2]; +}; + +enum io_uring_zcrx_area_flags { + IORING_ZCRX_AREA_DMABUF = 1, +}; + +struct io_uring_zcrx_area_reg { + __u64 addr; + __u64 len; + __u64 rq_area_token; + __u32 flags; + __u32 dmabuf_fd; + __u64 __resv2[2]; +}; + +enum zcrx_reg_flags { + ZCRX_REG_IMPORT = 1, + + /* + * Register a zcrx instance without a net device. All data will be + * copied. The refill queue entries might not be automatically + * consumed and need to be flushed, see ZCRX_CTRL_FLUSH_RQ. + */ + ZCRX_REG_NODEV = 2, +}; + +enum zcrx_features { + /* + * The user can ask for the desired rx page size by passing the + * value in struct io_uring_zcrx_ifq_reg::rx_buf_len. + */ + ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0, +}; + +/* + * Argument for IORING_REGISTER_ZCRX_IFQ + */ +struct io_uring_zcrx_ifq_reg { + __u32 if_idx; + __u32 if_rxq; + __u32 rq_entries; + __u32 flags; + + __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ + __u64 region_ptr; /* struct io_uring_region_desc * */ + + struct io_uring_zcrx_offsets offsets; + __u32 zcrx_id; + __u32 rx_buf_len; + __u64 __resv[3]; +}; + +enum zcrx_ctrl_op { + ZCRX_CTRL_FLUSH_RQ, + ZCRX_CTRL_EXPORT, + + __ZCRX_CTRL_LAST, +}; + +struct zcrx_ctrl_flush_rq { + __u64 __resv[6]; +}; + +struct zcrx_ctrl_export { + __u32 zcrx_fd; + __u32 __resv1[11]; +}; + +struct zcrx_ctrl { + __u32 zcrx_id; + __u32 op; /* see enum zcrx_ctrl_op */ + __u64 __resv[2]; + + union { + struct zcrx_ctrl_export zc_export; + struct zcrx_ctrl_flush_rq zc_flush; + }; +}; + +#endif /* LINUX_IO_ZCRX_H */ |
