summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 16:22:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-13 16:22:30 -0700
commit23acda7c221a76ff711d65f4ca90029d43b249a0 (patch)
tree3e7745c9210489864e153990c06833d7d47a3dcd /include/linux
parent7fe6ac157b7e15c8976bd62ad7cb98e248884e83 (diff)
parentc5e9f6a96bf7379da87df1b852b90527e242b56f (diff)
Merge tag 'for-7.1/io_uring-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe: - Add a callback driven main loop for io_uring, and BPF struct_ops on top to allow implementing custom event loop logic - Decouple IOPOLL from being a ring-wide all-or-nothing setting, allowing IOPOLL use cases to also issue certain white listed non-polled opcodes - Timeout improvements. Migrate internal timeout storage from timespec64 to ktime_t for simpler arithmetic and avoid copying of timespec data - Zero-copy receive (zcrx) updates: - Add a device-less mode (ZCRX_REG_NODEV) for testing and experimentation where data flows through the copy fallback path - Fix two-step unregistration regression, DMA length calculations, xarray mark usage, and a potential 32-bit overflow in id shifting - Refactoring toward multi-area support: dedicated refill queue struct, consolidated DMA syncing, netmem array refilling format, and guard-based locking - Zero-copy transmit (zctx) cleanup: - Unify io_send_zc() and io_sendmsg_zc() into a single function - Add vectorized registered buffer send for IORING_OP_SEND_ZC - Add separate notification user_data via sqe->addr3 so notification and completion CQEs can be distinguished without extra reference counting - Switch struct io_ring_ctx internal bitfields to explicit flag bits with atomic-safe accessors, and annotate the known harmless races on those flags - Various optimizations caching ctx and other request fields in local variables to avoid repeated loads, and cleanups for tctx setup, ring fd registration, and read path early returns * tag 'for-7.1/io_uring-20260411' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (58 commits) io_uring: unify getting ctx from passed in file descriptor io_uring/register: don't get a reference to the registered ring fd io_uring/tctx: clean up __io_uring_add_tctx_node() error handling io_uring/tctx: have io_uring_alloc_task_context() return tctx io_uring/timeout: use 'ctx' consistently io_uring/rw: clean up __io_read() obsolete comment and early returns io_uring/zcrx: use correct mmap off constants io_uring/zcrx: use dma_len for chunk size calculation io_uring/zcrx: don't clear not allocated niovs io_uring/zcrx: don't use mark0 for allocating xarray io_uring: cast id to u64 before shifting in io_allocate_rbuf_ring() io_uring/zcrx: reject REG_NODEV with large rx_buf_size io_uring/cancel: validate opcode for IORING_ASYNC_CANCEL_OP io_uring/rsrc: use io_cache_free() to free node io_uring/zcrx: rename zcrx [un]register functions io_uring/zcrx: check ctrl op payload struct sizes io_uring/zcrx: cache fallback availability in zcrx ctx io_uring/zcrx: warn on a repeated area append io_uring/zcrx: consolidate dma syncing io_uring/zcrx: netmem array as refiling format ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/io_uring_types.h47
1 files changed, 33 insertions, 14 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 214fdbd49052..244392026c6d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -8,6 +8,9 @@
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
+struct iou_loop_params;
+struct io_uring_bpf_ops;
+
enum {
/*
* A hint to not wake right away but delay until there are enough of
@@ -41,6 +44,8 @@ enum io_uring_cmd_flags {
IO_URING_F_COMPAT = (1 << 12),
};
+struct iou_loop_params;
+
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -268,24 +273,30 @@ struct io_alloc_cache {
unsigned int init_clear;
};
+enum {
+ IO_RING_F_DRAIN_NEXT = BIT(0),
+ IO_RING_F_OP_RESTRICTED = BIT(1),
+ IO_RING_F_REG_RESTRICTED = BIT(2),
+ IO_RING_F_OFF_TIMEOUT_USED = BIT(3),
+ IO_RING_F_DRAIN_ACTIVE = BIT(4),
+ IO_RING_F_HAS_EVFD = BIT(5),
+ /* all CQEs should be posted only by the submitter task */
+ IO_RING_F_TASK_COMPLETE = BIT(6),
+ IO_RING_F_LOCKLESS_CQ = BIT(7),
+ IO_RING_F_SYSCALL_IOPOLL = BIT(8),
+ IO_RING_F_POLL_ACTIVATED = BIT(9),
+ IO_RING_F_DRAIN_DISABLED = BIT(10),
+ IO_RING_F_COMPAT = BIT(11),
+ IO_RING_F_IOWQ_LIMITS_SET = BIT(12),
+};
+
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
+ /* ring setup flags */
unsigned int flags;
- unsigned int drain_next: 1;
- unsigned int op_restricted: 1;
- unsigned int reg_restricted: 1;
- unsigned int off_timeout_used: 1;
- unsigned int drain_active: 1;
- unsigned int has_evfd: 1;
- /* all CQEs should be posted only by the submitter task */
- unsigned int task_complete: 1;
- unsigned int lockless_cq: 1;
- unsigned int syscall_iopoll: 1;
- unsigned int poll_activated: 1;
- unsigned int drain_disabled: 1;
- unsigned int compat: 1;
- unsigned int iowq_limits_set : 1;
+ /* internal state flags IO_RING_F_* flags , mostly read-only */
+ unsigned int int_flags;
struct task_struct *submitter_task;
struct io_rings *rings;
@@ -355,6 +366,9 @@ struct io_ring_ctx {
struct io_alloc_cache rw_cache;
struct io_alloc_cache cmd_cache;
+ int (*loop_step)(struct io_ring_ctx *ctx,
+ struct iou_loop_params *);
+
/*
* Any cancelable uring_cmd is added to this list in
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
@@ -477,6 +491,8 @@ struct io_ring_ctx {
DECLARE_HASHTABLE(napi_ht, 4);
#endif
+ struct io_uring_bpf_ops *bpf_ops;
+
/*
* Protection for resize vs mmap races - both the mmap and resize
* side will need to grab this lock, to prevent either side from
@@ -545,6 +561,7 @@ enum {
REQ_F_HAS_METADATA_BIT,
REQ_F_IMPORT_BUFFER_BIT,
REQ_F_SQE_COPIED_BIT,
+ REQ_F_IOPOLL_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -638,6 +655,8 @@ enum {
REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
/* ->sqe_copy() has been called, if necessary */
REQ_F_SQE_COPIED = IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT),
+ /* request must be iopolled to completion (set in ->issue()) */
+ REQ_F_IOPOLL = IO_REQ_FLAG(REQ_F_IOPOLL_BIT),
};
struct io_tw_req {