summaryrefslogtreecommitdiff
path: root/include/uapi/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 16:22:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 16:22:30 -0700
commit23acda7c221a76ff711d65f4ca90029d43b249a0 (patch)
tree3e7745c9210489864e153990c06833d7d47a3dcd /include/uapi/linux
parent7fe6ac157b7e15c8976bd62ad7cb98e248884e83 (diff)
parentc5e9f6a96bf7379da87df1b852b90527e242b56f (diff)
Merge tag 'for-7.1/io_uring-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe: - Add a callback driven main loop for io_uring, and BPF struct_ops on top to allow implementing custom event loop logic - Decouple IOPOLL from being a ring-wide all-or-nothing setting, allowing IOPOLL use cases to also issue certain white listed non-polled opcodes - Timeout improvements. Migrate internal timeout storage from timespec64 to ktime_t for simpler arithmetic and avoid copying of timespec data - Zero-copy receive (zcrx) updates: - Add a device-less mode (ZCRX_REG_NODEV) for testing and experimentation where data flows through the copy fallback path - Fix two-step unregistration regression, DMA length calculations, xarray mark usage, and a potential 32-bit overflow in id shifting - Refactoring toward multi-area support: dedicated refill queue struct, consolidated DMA syncing, netmem array refilling format, and guard-based locking - Zero-copy transmit (zctx) cleanup: - Unify io_send_zc() and io_sendmsg_zc() into a single function - Add vectorized registered buffer send for IORING_OP_SEND_ZC - Add separate notification user_data via sqe->addr3 so notification and completion CQEs can be distinguished without extra reference counting - Switch struct io_ring_ctx internal bitfields to explicit flag bits with atomic-safe accessors, and annotate the known harmless races on those flags - Various optimizations caching ctx and other request fields in local variables to avoid repeated loads, and cleanups for tctx setup, ring fd registration, and read path early returns * tag 'for-7.1/io_uring-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (58 commits) io_uring: unify getting ctx from passed in file descriptor io_uring/register: don't get a reference to the registered ring fd io_uring/tctx: clean up __io_uring_add_tctx_node() error handling io_uring/tctx: have io_uring_alloc_task_context() return tctx io_uring/timeout: use 'ctx' consistently io_uring/rw: clean up __io_read() obsolete comment and early returns io_uring/zcrx: use correct mmap off constants io_uring/zcrx: use dma_len for chunk size calculation io_uring/zcrx: don't clear not allocated niovs io_uring/zcrx: don't use mark0 for allocating xarray io_uring: cast id to u64 before shifting in io_allocate_rbuf_ring() io_uring/zcrx: reject REG_NODEV with large rx_buf_size io_uring/cancel: validate opcode for IORING_ASYNC_CANCEL_OP io_uring/rsrc: use io_cache_free() to free node io_uring/zcrx: rename zcrx [un]register functions io_uring/zcrx: check ctrl op payload struct sizes io_uring/zcrx: cache fallback availability in zcrx ctx io_uring/zcrx: warn on a repeated area append io_uring/zcrx: consolidate dma syncing io_uring/zcrx: netmem array as refiling format ...
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/io_uring.h101
-rw-r--r--include/uapi/linux/io_uring/zcrx.h115
2 files changed, 122 insertions, 94 deletions
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1ff16141c8a5..17ac1b785440 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -10,6 +10,8 @@
#include <linux/fs.h>
#include <linux/types.h>
+#include <linux/io_uring/zcrx.h>
+
/*
* this file is shared with liburing and that has to autodetect
* if linux/time_types.h is available or not, it can
@@ -341,6 +343,10 @@ enum io_uring_op {
/*
* sqe->timeout_flags
+ *
+ * IORING_TIMEOUT_IMMEDIATE_ARG: If set, sqe->addr stores the timeout
+ * value in nanoseconds instead of
+ * pointing to a timespec.
*/
#define IORING_TIMEOUT_ABS (1U << 0)
#define IORING_TIMEOUT_UPDATE (1U << 1)
@@ -349,6 +355,7 @@ enum io_uring_op {
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
#define IORING_TIMEOUT_MULTISHOT (1U << 6)
+#define IORING_TIMEOUT_IMMEDIATE_ARG (1U << 7)
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/*
@@ -1050,100 +1057,6 @@ struct io_timespec {
__u64 tv_nsec;
};
-/* Zero copy receive refill queue entry */
-struct io_uring_zcrx_rqe {
- __u64 off;
- __u32 len;
- __u32 __pad;
-};
-
-struct io_uring_zcrx_cqe {
- __u64 off;
- __u64 __pad;
-};
-
-/* The bit from which area id is encoded into offsets */
-#define IORING_ZCRX_AREA_SHIFT 48
-#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
-
-struct io_uring_zcrx_offsets {
- __u32 head;
- __u32 tail;
- __u32 rqes;
- __u32 __resv2;
- __u64 __resv[2];
-};
-
-enum io_uring_zcrx_area_flags {
- IORING_ZCRX_AREA_DMABUF = 1,
-};
-
-struct io_uring_zcrx_area_reg {
- __u64 addr;
- __u64 len;
- __u64 rq_area_token;
- __u32 flags;
- __u32 dmabuf_fd;
- __u64 __resv2[2];
-};
-
-enum zcrx_reg_flags {
- ZCRX_REG_IMPORT = 1,
-};
-
-enum zcrx_features {
- /*
- * The user can ask for the desired rx page size by passing the
- * value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
- */
- ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
-};
-
-/*
- * Argument for IORING_REGISTER_ZCRX_IFQ
- */
-struct io_uring_zcrx_ifq_reg {
- __u32 if_idx;
- __u32 if_rxq;
- __u32 rq_entries;
- __u32 flags;
-
- __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
- __u64 region_ptr; /* struct io_uring_region_desc * */
-
- struct io_uring_zcrx_offsets offsets;
- __u32 zcrx_id;
- __u32 rx_buf_len;
- __u64 __resv[3];
-};
-
-enum zcrx_ctrl_op {
- ZCRX_CTRL_FLUSH_RQ,
- ZCRX_CTRL_EXPORT,
-
- __ZCRX_CTRL_LAST,
-};
-
-struct zcrx_ctrl_flush_rq {
- __u64 __resv[6];
-};
-
-struct zcrx_ctrl_export {
- __u32 zcrx_fd;
- __u32 __resv1[11];
-};
-
-struct zcrx_ctrl {
- __u32 zcrx_id;
- __u32 op; /* see enum zcrx_ctrl_op */
- __u64 __resv[2];
-
- union {
- struct zcrx_ctrl_export zc_export;
- struct zcrx_ctrl_flush_rq zc_flush;
- };
-};
-
#ifdef __cplusplus
}
#endif
diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
new file mode 100644
index 000000000000..5ce02c7a6096
--- /dev/null
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring zerocopy receive (zcrx) interface.
+ *
+ * Copyright (C) 2026 Pavel Begunkov
+ * Copyright (C) 2026 David Wei
+ * Copyright (C) Meta Platforms, Inc.
+ */
+#ifndef LINUX_IO_ZCRX_H
+#define LINUX_IO_ZCRX_H
+
+#include <linux/types.h>
+
+/* Zero copy receive refill queue entry */
+struct io_uring_zcrx_rqe {
+ __u64 off;
+ __u32 len;
+ __u32 __pad;
+};
+
+struct io_uring_zcrx_cqe {
+ __u64 off;
+ __u64 __pad;
+};
+
+/* The bit from which area id is encoded into offsets */
+#define IORING_ZCRX_AREA_SHIFT 48
+#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))
+
+struct io_uring_zcrx_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 rqes;
+ __u32 __resv2;
+ __u64 __resv[2];
+};
+
+enum io_uring_zcrx_area_flags {
+ IORING_ZCRX_AREA_DMABUF = 1,
+};
+
+struct io_uring_zcrx_area_reg {
+ __u64 addr;
+ __u64 len;
+ __u64 rq_area_token;
+ __u32 flags;
+ __u32 dmabuf_fd;
+ __u64 __resv2[2];
+};
+
+enum zcrx_reg_flags {
+ ZCRX_REG_IMPORT = 1,
+
+ /*
+ * Register a zcrx instance without a net device. All data will be
+ * copied. The refill queue entries might not be automatically
+ * consumed and need to be flushed, see ZCRX_CTRL_FLUSH_RQ.
+ */
+ ZCRX_REG_NODEV = 2,
+};
+
+enum zcrx_features {
+ /*
+ * The user can ask for the desired rx page size by passing the
+ * value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
+ */
+ ZCRX_FEATURE_RX_PAGE_SIZE = 1 << 0,
+};
+
+/*
+ * Argument for IORING_REGISTER_ZCRX_IFQ
+ */
+struct io_uring_zcrx_ifq_reg {
+ __u32 if_idx;
+ __u32 if_rxq;
+ __u32 rq_entries;
+ __u32 flags;
+
+ __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
+ __u64 region_ptr; /* struct io_uring_region_desc * */
+
+ struct io_uring_zcrx_offsets offsets;
+ __u32 zcrx_id;
+ __u32 rx_buf_len;
+ __u64 __resv[3];
+};
+
+enum zcrx_ctrl_op {
+ ZCRX_CTRL_FLUSH_RQ,
+ ZCRX_CTRL_EXPORT,
+
+ __ZCRX_CTRL_LAST,
+};
+
+struct zcrx_ctrl_flush_rq {
+ __u64 __resv[6];
+};
+
+struct zcrx_ctrl_export {
+ __u32 zcrx_fd;
+ __u32 __resv1[11];
+};
+
+struct zcrx_ctrl {
+ __u32 zcrx_id;
+ __u32 op; /* see enum zcrx_ctrl_op */
+ __u64 __resv[2];
+
+ union {
+ struct zcrx_ctrl_export zc_export;
+ struct zcrx_ctrl_flush_rq zc_flush;
+ };
+};
+
+#endif /* LINUX_IO_ZCRX_H */