diff options
| author | Ming Lei <ming.lei@redhat.com> | 2026-03-31 23:31:52 +0800 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2026-04-07 07:38:26 -0600 |
| commit | 2fb0ded237bb55dae45bc076666b348fc948ac9e (patch) | |
| tree | 9f2f5f5de8c12506dba733455becda5716025d20 /include/uapi/linux | |
| parent | fa0cac9a515877fad856c860ad51107b86ed6c4f (diff) | |
ublk: add UBLK_U_CMD_REG_BUF/UNREG_BUF control commands
Add control commands for registering and unregistering shared memory
buffers for zero-copy I/O:
- UBLK_U_CMD_REG_BUF (0x18): pins pages from userspace, inserts PFN
ranges into a per-device maple tree for O(log n) lookup during I/O.
Buffer pointers are tracked in a per-device xarray. Returns the
assigned buffer index.
- UBLK_U_CMD_UNREG_BUF (0x19): removes PFN entries and unpins pages.
Queue freeze/unfreeze is handled internally so userspace need not
quiesce the device during registration.
Also adds:
- UBLK_IO_F_SHMEM_ZC flag and addr encoding helpers in UAPI header
(16-bit buffer index supporting up to 65536 buffers)
- Data structures (ublk_buf, ublk_buf_range) and xarray/maple tree
- __ublk_ctrl_reg_buf() helper for PFN insertion with error unwinding
- __ublk_ctrl_unreg_buf() helper for cleanup reuse
- ublk_support_shmem_zc() / ublk_dev_support_shmem_zc() stubs
(returning false — feature not enabled yet)
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://patch.msgid.link/20260331153207.3635125-2-ming.lei@redhat.com
[axboe: fixup ublk_buf_reg -> ublk_shmem_buf_reg errors, comments]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/uapi/linux')
| -rw-r--r-- | include/uapi/linux/ublk_cmd.h | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index a88876756805..5b71c19d3b9c 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -58,6 +58,44 @@ #define UBLK_U_CMD_TRY_STOP_DEV \ _IOWR('u', 0x17, struct ublksrv_ctrl_cmd) /* + * Register a shared memory buffer for zero-copy I/O. + * Input: ctrl_cmd.addr points to struct ublk_shmem_buf_reg (buffer VA + size) + * ctrl_cmd.len = sizeof(struct ublk_shmem_buf_reg) + * Result: >= 0 is the assigned buffer index, < 0 is error + * + * The kernel pins pages from the calling process's address space + * and inserts PFN ranges into a per-device maple tree. When a block + * request's pages match registered pages, the driver sets + * UBLK_IO_F_SHMEM_ZC and encodes the buffer index + offset in addr, + * allowing the server to access the data via its own mapping of the + * same shared memory — true zero copy. + * + * The memory can be backed by memfd, hugetlbfs, or any GUP-compatible + * shared mapping. Queue freeze is handled internally. + * + * The buffer VA and size are passed via a user buffer (not inline in + * ctrl_cmd) so that unprivileged devices can prepend the device path + * to ctrl_cmd.addr without corrupting the VA. + */ +#define UBLK_U_CMD_REG_BUF \ + _IOWR('u', 0x18, struct ublksrv_ctrl_cmd) +/* + * Unregister a shared memory buffer. + * Input: ctrl_cmd.data[0] = buffer index + */ +#define UBLK_U_CMD_UNREG_BUF \ + _IOWR('u', 0x19, struct ublksrv_ctrl_cmd) + +/* Parameter buffer for UBLK_U_CMD_REG_BUF, pointed to by ctrl_cmd.addr */ +struct ublk_shmem_buf_reg { + __u64 addr; /* userspace virtual address of shared memory */ + __u32 len; /* buffer size in bytes (page-aligned, max 4GB) */ + __u32 flags; +}; + +/* Pin pages without FOLL_WRITE; usable with write-sealed memfd */ +#define UBLK_SHMEM_BUF_READ_ONLY (1U << 0) +/* * 64bits are enough now, and it should be easy to extend in case of * running out of feature flags */ @@ -370,6 +408,7 @@ /* Disable automatic partition scanning when device is started */ #define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 @@ -469,6 +508,12 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_NEED_REG_BUF (1U << 17) /* Request has an integrity data buffer */ #define UBLK_IO_F_INTEGRITY (1UL << 18) +/* + * I/O buffer is in a registered shared memory buffer. When set, the addr + * field in ublksrv_io_desc encodes buffer index and byte offset instead + * of a userspace virtual address. + */ +#define UBLK_IO_F_SHMEM_ZC (1U << 19) /* * io cmd is described by this structure, and stored in share memory, indexed @@ -743,4 +788,31 @@ struct ublk_params { struct ublk_param_integrity integrity; }; +/* + * Shared memory zero-copy addr encoding for UBLK_IO_F_SHMEM_ZC. + * + * When UBLK_IO_F_SHMEM_ZC is set, ublksrv_io_desc.addr is encoded as: + * bits [0:31] = byte offset within the buffer (up to 4GB) + * bits [32:47] = buffer index (up to 65536) + * bits [48:63] = reserved (must be zero) + */ +#define UBLK_SHMEM_ZC_OFF_MASK 0xffffffffULL +#define UBLK_SHMEM_ZC_IDX_OFF 32 +#define UBLK_SHMEM_ZC_IDX_MASK 0xffffULL + +static inline __u64 ublk_shmem_zc_addr(__u16 index, __u32 offset) +{ + return ((__u64)index << UBLK_SHMEM_ZC_IDX_OFF) | offset; +} + +static inline __u16 ublk_shmem_zc_index(__u64 addr) +{ + return (addr >> UBLK_SHMEM_ZC_IDX_OFF) & UBLK_SHMEM_ZC_IDX_MASK; +} + +static inline __u32 ublk_shmem_zc_offset(__u64 addr) +{ + return (__u32)(addr & UBLK_SHMEM_ZC_OFF_MASK); +} + #endif |
