summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2026-06-23 09:05:44 -0600
committerJens Axboe <axboe@kernel.dk>2026-06-23 09:05:44 -0600
commit29264400dd2af703ecffae039c2ee799558e2d34 (patch)
tree1aa1e97ae2fd0d61e7fb6d90224d70dc4f1232ba
parent3ed9b4779a4aa3f44cd9f78627498d7adac40daa (diff)
parent7d953c75f0a3f905aadf3675c9394a5b9d9897bf (diff)
Merge tag 'nvme-7.2-2026-06-23' of git://git.infradead.org/nvme into block-7.2
Pull NVMe fixes from Keith: "- Apple A11 quirk for sharing tags across admin and IO queues (Nick) - Target fix for short AUTH_RECEIVE buffers (Michael) - Target fix for SQ refcount leak (Wentao) - Target RDMA handling inline data with nonzero offset (Bryam) - Target TCP fix handling the TCP_CLOSING state (Maurizio) - FC abort fixes in early initialization (Mohamed) - Controller device teardown fixes (Maurizio, John) - Allocate the target ana_state with the port (Rosen) - Quieten sparse and sysfs symbol warnings (John)" * tag 'nvme-7.2-2026-06-23' of git://git.infradead.org/nvme: nvmet-tcp: handle TCP_CLOSING state in nvmet_tcp_state_change nvmet-auth: reject short AUTH_RECEIVE buffers nvme-fc: Do not cancel requests in io target before it is initialized nvme: make nvme_add_ns{_head}_cdev return void nvme: make some sysfs diagnostic structures static nvmet-rdma: handle inline data with a nonzero offset nvme: target: allocate ana_state with port nvme: fix crash and memory leak during invalid cdev teardown nvmet: fix refcount leak in nvmet_sq_create() nvme: quieten sparse warning in valid LBA size check nvme-apple: Prevent shared tags across queues on Apple A11
-rw-r--r--drivers/nvme/host/apple.c12
-rw-r--r--drivers/nvme/host/core.c45
-rw-r--r--drivers/nvme/host/fc.c7
-rw-r--r--drivers/nvme/host/multipath.c24
-rw-r--r--drivers/nvme/host/nvme.h5
-rw-r--r--drivers/nvme/host/sysfs.c6
-rw-r--r--drivers/nvme/target/configfs.c9
-rw-r--r--drivers/nvme/target/core.c2
-rw-r--r--drivers/nvme/target/fabrics-cmd-auth.c26
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--drivers/nvme/target/rdma.c18
-rw-r--r--drivers/nvme/target/tcp.c1
12 files changed, 100 insertions, 57 deletions
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index f9327feb87d0..be3b91b43ea5 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -225,7 +225,7 @@ static unsigned int apple_nvme_queue_depth(struct apple_nvme_queue *q)
{
struct apple_nvme *anv = queue_to_apple_nvme(q);
- if (q->is_adminq && anv->hw->has_lsq_nvmmu)
+ if (q->is_adminq)
return APPLE_NVME_AQ_DEPTH;
return anv->hw->max_queue_depth;
@@ -303,7 +303,7 @@ static void apple_nvme_submit_cmd_t8015(struct apple_nvme_queue *q,
memcpy((void *)q->sqes + (q->sq_tail << APPLE_NVME_IOSQES),
cmd, sizeof(*cmd));
- if (++q->sq_tail == anv->hw->max_queue_depth)
+ if (++q->sq_tail == apple_nvme_queue_depth(q))
q->sq_tail = 0;
writel(q->sq_tail, q->sq_db);
@@ -1139,10 +1139,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
}
/* Setup the admin queue */
- if (anv->hw->has_lsq_nvmmu)
- aqa = APPLE_NVME_AQ_DEPTH - 1;
- else
- aqa = anv->hw->max_queue_depth - 1;
+ aqa = APPLE_NVME_AQ_DEPTH - 1;
aqa |= aqa << 16;
writel(aqa, anv->mmio_nvme + NVME_REG_AQA);
writeq(anv->adminq.sq_dma_addr, anv->mmio_nvme + NVME_REG_ASQ);
@@ -1325,8 +1322,7 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv)
* both queues. The admin queue gets the first APPLE_NVME_AQ_DEPTH which
* must be marked as reserved in the IO queue.
*/
- if (anv->hw->has_lsq_nvmmu)
- anv->tagset.reserved_tags = APPLE_NVME_AQ_DEPTH;
+ anv->tagset.reserved_tags = APPLE_NVME_AQ_DEPTH;
anv->tagset.queue_depth = anv->hw->max_queue_depth - 1;
anv->tagset.timeout = NVME_IO_TIMEOUT;
anv->tagset.numa_node = NUMA_NO_NODE;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3b7a8f7a3542..453c1f0b2dd0 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2379,6 +2379,11 @@ free:
return ret;
}
+static bool nvme_invalid_lba_sz(u64 nsze, signed int shift, sector_t *capacity)
+{
+ return check_shl_overflow(nsze, shift, capacity);
+}
+
static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info)
{
@@ -2422,10 +2427,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
- if (id->lbaf[lbaf].ds < SECTOR_SHIFT ||
- check_shl_overflow(le64_to_cpu(id->nsze),
- id->lbaf[lbaf].ds - SECTOR_SHIFT,
- &capacity)) {
+ if (nvme_invalid_lba_sz(le64_to_cpu(id->nsze),
+ id->lbaf[lbaf].ds - SECTOR_SHIFT, &capacity)) {
dev_warn_once(ns->ctrl->device,
"invalid LBA data size %u, skipping namespace\n",
id->lbaf[lbaf].ds);
@@ -3895,7 +3898,8 @@ void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
put_device(cdev_device);
}
-int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
+int nvme_cdev_add(const char *name, struct cdev *cdev,
+ struct device *cdev_device,
const struct file_operations *fops, struct module *owner)
{
int minor, ret;
@@ -3903,6 +3907,12 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
minor = ida_alloc(&nvme_ns_chr_minor_ida, GFP_KERNEL);
if (minor < 0)
return minor;
+
+ ret = dev_set_name(cdev_device, name);
+ if (ret) {
+ ida_free(&nvme_ns_chr_minor_ida, minor);
+ return ret;
+ }
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
cdev_device->class = &nvme_ns_chr_class;
cdev_device->release = nvme_cdev_rel;
@@ -3937,18 +3947,21 @@ static const struct file_operations nvme_ns_chr_fops = {
.uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
};
-static int nvme_add_ns_cdev(struct nvme_ns *ns)
+static void nvme_add_ns_cdev(struct nvme_ns *ns)
{
- int ret;
+ char name[32];
ns->cdev_device.parent = ns->ctrl->device;
- ret = dev_set_name(&ns->cdev_device, "ng%dn%d",
- ns->ctrl->instance, ns->head->instance);
- if (ret)
- return ret;
+ snprintf(name, sizeof(name), "ng%dn%d", ns->ctrl->instance,
+ ns->head->instance);
- return nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
- ns->ctrl->ops->module);
+ if (nvme_cdev_add(name, &ns->cdev, &ns->cdev_device,
+ &nvme_ns_chr_fops, ns->ctrl->ops->module)) {
+ dev_err(ns->ctrl->device, "Unable to create the %s device\n",
+ name);
+ return;
+ }
+ set_bit(NVME_NS_CDEV_LIVE, &ns->flags);
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
@@ -4324,8 +4337,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
/* guarantee not available in head->list */
synchronize_srcu(&ns->head->srcu);
- if (!nvme_ns_head_multipath(ns->head))
- nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+ if (!nvme_ns_head_multipath(ns->head)) {
+ if (test_and_clear_bit(NVME_NS_CDEV_LIVE, &ns->flags))
+ nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+ }
nvme_mpath_remove_sysfs_link(ns);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2c9a6d3c9797..04363b9c4489 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2461,7 +2461,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
* io requests back to the block layer as part of normal completions
* (but with error status).
*/
- if (ctrl->ctrl.queue_count > 1) {
+ if (ctrl->ctrl.queue_count > 1 && ctrl->ctrl.tagset) {
nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set,
@@ -2900,6 +2900,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
out_delete_hw_queues:
nvme_fc_delete_hw_io_queues(ctrl);
out_cleanup_tagset:
+ /*
+ * In CONNECTING state ctrl->ioerr_work will abort both admin
+ * and io tagsets. Cancel it first before removing io tagset.
+ */
+ cancel_work_sync(&ctrl->ioerr_work);
nvme_remove_io_tag_set(&ctrl->ctrl);
nvme_fc_free_io_queues(ctrl);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index e033ede953cc..9b9a657fa330 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -639,18 +639,21 @@ static const struct file_operations nvme_ns_head_chr_fops = {
.uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
};
-static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
+static void nvme_add_ns_head_cdev(struct nvme_ns_head *head)
{
- int ret;
+ char name[32];
head->cdev_device.parent = &head->subsys->dev;
- ret = dev_set_name(&head->cdev_device, "ng%dn%d",
- head->subsys->instance, head->instance);
- if (ret)
- return ret;
- ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
- &nvme_ns_head_chr_fops, THIS_MODULE);
- return ret;
+ snprintf(name, sizeof(name), "ng%dn%d", head->subsys->instance,
+ head->instance);
+
+ if (nvme_cdev_add(name, &head->cdev, &head->cdev_device,
+ &nvme_ns_head_chr_fops, THIS_MODULE)) {
+ dev_err(disk_to_dev(head->disk),
+ "Unable to create the %s device\n", name);
+ return;
+ }
+ set_bit(NVME_NSHEAD_CDEV_LIVE, &head->flags);
}
static void nvme_partition_scan_work(struct work_struct *work)
@@ -694,7 +697,8 @@ static void nvme_remove_head(struct nvme_ns_head *head)
*/
kblockd_schedule_work(&head->requeue_work);
- nvme_cdev_del(&head->cdev, &head->cdev_device);
+ if (test_and_clear_bit(NVME_NSHEAD_CDEV_LIVE, &head->flags))
+ nvme_cdev_del(&head->cdev, &head->cdev_device);
synchronize_srcu(&head->srcu);
del_gendisk(head->disk);
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index b367c67dcb37..824651cc898d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -573,6 +573,7 @@ struct nvme_ns_head {
atomic_long_t io_fail_no_available_path_count;
#define NVME_NSHEAD_DISK_LIVE 0
#define NVME_NSHEAD_QUEUE_IF_NO_PATH 1
+#define NVME_NSHEAD_CDEV_LIVE 2
struct nvme_ns __rcu *current_path[];
#endif
};
@@ -611,6 +612,7 @@ struct nvme_ns {
#define NVME_NS_FORCE_RO 3
#define NVME_NS_READY 4
#define NVME_NS_SYSFS_ATTR_LINK 5
+#define NVME_NS_CDEV_LIVE 6
struct cdev cdev;
struct device cdev_device;
@@ -995,7 +997,8 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset);
bool nvme_tryget_ns_head(struct nvme_ns_head *head);
void nvme_put_ns_head(struct nvme_ns_head *head);
-int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
+int nvme_cdev_add(const char *name, struct cdev *cdev,
+ struct device *cdev_device,
const struct file_operations *fops, struct module *owner);
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 933a5adfb7af..75b2d69b5957 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -394,7 +394,7 @@ static ssize_t nvme_io_errors_store(struct device *dev,
return count;
}
-struct device_attribute dev_attr_io_errors =
+static struct device_attribute dev_attr_io_errors =
__ATTR(command_error_count, 0644,
nvme_io_errors_show, nvme_io_errors_store);
@@ -441,7 +441,7 @@ static umode_t nvme_ns_diag_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
-const struct attribute_group nvme_ns_diag_attr_group = {
+static const struct attribute_group nvme_ns_diag_attr_group = {
.name = "diag",
.attrs = nvme_ns_diag_attrs,
.is_visible = nvme_ns_diag_attrs_are_visible,
@@ -1147,7 +1147,7 @@ static ssize_t nvme_adm_errors_store(struct device *dev,
return count;
}
-struct device_attribute dev_attr_adm_errors =
+static struct device_attribute dev_attr_adm_errors =
__ATTR(command_error_count, 0644,
nvme_adm_errors_show, nvme_adm_errors_store);
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index b88f897f06e2..2b69ffcfc8df 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -2007,7 +2007,6 @@ static void nvmet_port_release(struct config_item *item)
list_del(&port->global_entry);
key_put(port->keyring);
- kfree(port->ana_state);
kfree(port);
}
@@ -2047,16 +2046,10 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
if (kstrtou16(name, 0, &portid))
return ERR_PTR(-EINVAL);
- port = kzalloc_obj(*port);
+ port = kzalloc_flex(*port, ana_state, NVMET_MAX_ANAGRPS + 1);
if (!port)
return ERR_PTR(-ENOMEM);
- port->ana_state = kzalloc_objs(*port->ana_state, NVMET_MAX_ANAGRPS + 1);
- if (!port->ana_state) {
- kfree(port);
- return ERR_PTR(-ENOMEM);
- }
-
if (IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS) && nvme_keyring_id()) {
port->keyring = key_lookup(nvme_keyring_id());
if (IS_ERR(port->keyring)) {
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 62dd59b9aa4f..4477c4d6b1ee 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -944,7 +944,7 @@ u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
status = nvmet_check_sqid(ctrl, sqid, true);
if (status != NVME_SC_SUCCESS)
- return status;
+ goto ctrl_put;
ret = nvmet_sq_init(sq, cq);
if (ret) {
diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c
index 0a85acf1e5c7..45820a12750d 100644
--- a/drivers/nvme/target/fabrics-cmd-auth.c
+++ b/drivers/nvme/target/fabrics-cmd-auth.c
@@ -493,7 +493,31 @@ static void nvmet_auth_failure1(struct nvmet_req *req, void *d, int al)
u32 nvmet_auth_receive_data_len(struct nvmet_req *req)
{
- return le32_to_cpu(req->cmd->auth_receive.al);
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ u32 al = le32_to_cpu(req->cmd->auth_receive.al);
+ u32 min_len;
+
+ /*
+ * Reject too-short al before kmalloc(al), since the SUCCESS1 and
+ * FAILURE1/default builders write fixed response headers into it.
+ */
+ switch (req->sq->dhchap_step) {
+ case NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE:
+ return al;
+ case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1:
+ min_len = sizeof(struct nvmf_auth_dhchap_success1_data);
+ if (req->sq->dhchap_c2)
+ min_len += nvme_auth_hmac_hash_len(ctrl->shash_id);
+ break;
+ default:
+ min_len = sizeof(struct nvmf_auth_dhchap_failure_data);
+ break;
+ }
+
+ if (al < min_len)
+ return 0;
+
+ return al;
}
void nvmet_execute_auth_receive(struct nvmet_req *req)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 3305a88684ec..aaba745e3c21 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -208,7 +208,6 @@ struct nvmet_port {
struct list_head global_entry;
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
- enum nvme_ana_state *ana_state;
struct key *keyring;
void *priv;
bool enabled;
@@ -217,6 +216,7 @@ struct nvmet_port {
int mdts;
const struct nvmet_fabrics_ops *tr_ops;
bool pi_enable;
+ enum nvme_ana_state ana_state[];
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ac26f4f774c4..ea1185b8267e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -666,7 +666,8 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
if (rsp->n_rdma)
nvmet_rdma_rw_ctx_destroy(rsp);
- if (rsp->req.sg != rsp->cmd->inline_sg)
+ if (rsp->req.sg < rsp->cmd->inline_sg ||
+ rsp->req.sg >= rsp->cmd->inline_sg + queue->dev->inline_page_count)
nvmet_req_free_sgls(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
@@ -821,24 +822,25 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
u64 off)
{
- int sg_count = num_pages(len);
+ u64 page_off = off % PAGE_SIZE;
+ u64 page_idx = off / PAGE_SIZE;
+ int sg_count = num_pages(page_off + len);
struct scatterlist *sg;
int i;
- sg = rsp->cmd->inline_sg;
+ sg = &rsp->cmd->inline_sg[page_idx];
for (i = 0; i < sg_count; i++, sg++) {
if (i < sg_count - 1)
sg_unmark_end(sg);
else
sg_mark_end(sg);
- sg->offset = off;
- sg->length = min_t(int, len, PAGE_SIZE - off);
+ sg->offset = page_off;
+ sg->length = min_t(u64, len, PAGE_SIZE - page_off);
len -= sg->length;
- if (!i)
- off = 0;
+ page_off = 0;
}
- rsp->req.sg = rsp->cmd->inline_sg;
+ rsp->req.sg = &rsp->cmd->inline_sg[page_idx];
rsp->req.sg_cnt = sg_count;
}
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 15c52f1f95f1..75a276d73be3 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1680,6 +1680,7 @@ static void nvmet_tcp_state_change(struct sock *sk)
switch (sk->sk_state) {
case TCP_FIN_WAIT2:
case TCP_LAST_ACK:
+ case TCP_CLOSING:
break;
case TCP_FIN_WAIT1:
case TCP_CLOSE_WAIT: