summaryrefslogtreecommitdiff
path: root/include/uapi/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/ublk_cmd.h72
1 files changed, 72 insertions, 0 deletions
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index a88876756805..5b71c19d3b9c 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -58,6 +58,44 @@
#define UBLK_U_CMD_TRY_STOP_DEV \
_IOWR('u', 0x17, struct ublksrv_ctrl_cmd)
/*
+ * Register a shared memory buffer for zero-copy I/O.
+ * Input: ctrl_cmd.addr points to struct ublk_shmem_buf_reg (buffer VA + size)
+ * ctrl_cmd.len = sizeof(struct ublk_shmem_buf_reg)
+ * Result: >= 0 is the assigned buffer index, < 0 is error
+ *
+ * The kernel pins pages from the calling process's address space
+ * and inserts PFN ranges into a per-device maple tree. When a block
+ * request's pages match registered pages, the driver sets
+ * UBLK_IO_F_SHMEM_ZC and encodes the buffer index + offset in addr,
+ * allowing the server to access the data via its own mapping of the
+ * same shared memory — true zero copy.
+ *
+ * The memory can be backed by memfd, hugetlbfs, or any GUP-compatible
+ * shared mapping. Queue freeze is handled internally.
+ *
+ * The buffer VA and size are passed via a user buffer (not inline in
+ * ctrl_cmd) so that unprivileged devices can prepend the device path
+ * to ctrl_cmd.addr without corrupting the VA.
+ */
+#define UBLK_U_CMD_REG_BUF \
+ _IOWR('u', 0x18, struct ublksrv_ctrl_cmd)
+/*
+ * Unregister a shared memory buffer.
+ * Input: ctrl_cmd.data[0] = buffer index
+ */
+#define UBLK_U_CMD_UNREG_BUF \
+ _IOWR('u', 0x19, struct ublksrv_ctrl_cmd)
+
+/* Parameter buffer for UBLK_U_CMD_REG_BUF, pointed to by ctrl_cmd.addr */
+struct ublk_shmem_buf_reg {
+ __u64 addr; /* userspace virtual address of shared memory */
+ __u32 len; /* buffer size in bytes (page-aligned, max 4GB) */
+ __u32 flags;
+};
+
+/* Pin pages without FOLL_WRITE; usable with write-sealed memfd */
+#define UBLK_SHMEM_BUF_READ_ONLY (1U << 0)
+/*
* 64bits are enough now, and it should be easy to extend in case of
* running out of feature flags
*/
@@ -370,6 +408,7 @@
/* Disable automatic partition scanning when device is started */
#define UBLK_F_NO_AUTO_PART_SCAN (1ULL << 18)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -469,6 +508,12 @@ struct ublksrv_ctrl_dev_info {
#define UBLK_IO_F_NEED_REG_BUF (1U << 17)
/* Request has an integrity data buffer */
#define UBLK_IO_F_INTEGRITY (1UL << 18)
+/*
+ * I/O buffer is in a registered shared memory buffer. When set, the addr
+ * field in ublksrv_io_desc encodes buffer index and byte offset instead
+ * of a userspace virtual address.
+ */
+#define UBLK_IO_F_SHMEM_ZC (1U << 19)
/*
* io cmd is described by this structure, and stored in share memory, indexed
@@ -743,4 +788,31 @@ struct ublk_params {
struct ublk_param_integrity integrity;
};
+/*
+ * Shared memory zero-copy addr encoding for UBLK_IO_F_SHMEM_ZC.
+ *
+ * When UBLK_IO_F_SHMEM_ZC is set, ublksrv_io_desc.addr is encoded as:
+ * bits [0:31] = byte offset within the buffer (up to 4GB)
+ * bits [32:47] = buffer index (up to 65536)
+ * bits [48:63] = reserved (must be zero)
+ */
+#define UBLK_SHMEM_ZC_OFF_MASK 0xffffffffULL
+#define UBLK_SHMEM_ZC_IDX_OFF 32
+#define UBLK_SHMEM_ZC_IDX_MASK 0xffffULL
+
+static inline __u64 ublk_shmem_zc_addr(__u16 index, __u32 offset)
+{
+ return ((__u64)index << UBLK_SHMEM_ZC_IDX_OFF) | offset;
+}
+
+static inline __u16 ublk_shmem_zc_index(__u64 addr)
+{
+ return (addr >> UBLK_SHMEM_ZC_IDX_OFF) & UBLK_SHMEM_ZC_IDX_MASK;
+}
+
+static inline __u32 ublk_shmem_zc_offset(__u64 addr)
+{
+ return (__u32)(addr & UBLK_SHMEM_ZC_OFF_MASK);
+}
+
#endif