From 340f4fc5508c2905a1f30de229e2a4b299d55735 Mon Sep 17 00:00:00 2001 From: Ilikara Zheng Date: Mon, 8 Dec 2025 21:23:40 +0800 Subject: nvme-pci: disable secondary temp for Wodposit WPBSNM8 Secondary temperature thresholds (temp2_{min,max}) were not reported properly on this NVMe SSD. This resulted in an error while attempting to read these values with sensors(1): ERROR: Can't get value of subfeature temp2_min: I/O error ERROR: Can't get value of subfeature temp2_max: I/O error Add the device to the nvme_id_table with the NVME_QUIRK_NO_SECONDARY_TEMP_THRESH flag to suppress access to all non- composite temperature thresholds. Cc: stable@vger.kernel.org Tested-by: Wu Haotian Signed-off-by: Ilikara Zheng Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0e4caeab739c..29e715d5b8f3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3999,6 +3999,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x1fa0, 0x2283), /* Wodposit WPBSNM8-256GTP */ + .driver_data = NVME_QUIRK_NO_SECONDARY_TEMP_THRESH, }, { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ -- cgit v1.2.3 From 32b63acd78f577b332d976aa06b56e70d054cbba Mon Sep 17 00:00:00 2001 From: Shivam Kumar Date: Sat, 13 Dec 2025 13:57:48 -0500 Subject: nvme-tcp: fix NULL pointer dereferences in nvmet_tcp_build_pdu_iovec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit efa56305908b ("nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length") added ttag bounds checking and data_offset validation in nvmet_tcp_handle_h2c_data_pdu(), but it did not validate whether the command's data structures (cmd->req.sg and cmd->iov) have been properly initialized before processing H2C_DATA PDUs. The nvmet_tcp_build_pdu_iovec() function dereferences these pointers without NULL checks. This can be triggered by sending H2C_DATA PDU immediately after the ICREQ/ICRESP handshake, before sending a CONNECT command or NVMe write command. Attack vectors that trigger NULL pointer dereferences: 1. H2C_DATA PDU sent before CONNECT → both pointers NULL 2. H2C_DATA PDU for READ command → cmd->req.sg allocated, cmd->iov NULL 3. H2C_DATA PDU for uninitialized command slot → both pointers NULL The fix validates both cmd->req.sg and cmd->iov before calling nvmet_tcp_build_pdu_iovec(). Both checks are required because: - Uninitialized commands: both NULL - READ commands: cmd->req.sg allocated, cmd->iov NULL - WRITE commands: both allocated Fixes: efa56305908b ("nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length") Reviewed-by: Sagi Grimberg Signed-off-by: Shivam Kumar Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 15416ff0eac4..d5966d007ba3 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -982,6 +982,18 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len); goto err_proto; } + /* + * Ensure command data structures are initialized. We must check both + * cmd->req.sg and cmd->iov because they can have different NULL states: + * - Uninitialized commands: both NULL + * - READ commands: cmd->req.sg allocated, cmd->iov NULL + * - WRITE commands: both allocated + */ + if (unlikely(!cmd->req.sg || !cmd->iov)) { + pr_err("queue %d: H2CData PDU received for invalid command state (ttag %u)\n", + queue->idx, data->ttag); + goto err_proto; + } cmd->pdu_recv = 0; nvmet_tcp_build_pdu_iovec(cmd); queue->cmd = cmd; -- cgit v1.2.3 From 7d3fa7e954934fbda0a017ac1c305b7b10ecceef Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Wed, 31 Dec 2025 11:10:57 +0100 Subject: nvme-apple: add "apple,t8103-nvme-ans2" as compatible After discussion with the devicetree maintainers we agreed to not extend lists with the generic compatible "apple,nvme-ans2" anymore [1]. Add "apple,t8103-nvme-ans2" as fallback compatible as it is the SoC the driver and bindings were written for. [1]: https://lore.kernel.org/asahi/12ab93b7-1fc2-4ce0-926e-c8141cfe81bf@kernel.org/ Cc: stable@vger.kernel.org # v6.18+ Fixes: 5bd2927aceba ("nvme-apple: Add initial Apple SoC NVMe driver") Reviewed-by: Neal Gompa Reviewed-by: Christoph Hellwig Signed-off-by: Janne Grunau Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 15b3d07f8ccd..ed61b97fde59 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1704,6 +1704,7 @@ static const struct apple_nvme_hw apple_nvme_t8103_hw = { static const struct of_device_id apple_nvme_of_match[] = { { .compatible = "apple,t8015-nvme-ans2", .data = &apple_nvme_t8015_hw }, + { .compatible = "apple,t8103-nvme-ans2", .data = &apple_nvme_t8103_hw }, { .compatible = "apple,nvme-ans2", .data = &apple_nvme_t8103_hw }, {}, }; -- cgit v1.2.3 From d1877cc7270302081a315a81a0ee8331f19f95c8 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Fri, 19 Dec 2025 16:18:42 -0800 Subject: nvme-fc: release admin tagset if init fails nvme_fabrics creates an NVMe/FC controller in following path: nvmf_dev_write() -> nvmf_create_ctrl() -> nvme_fc_create_ctrl() -> nvme_fc_init_ctrl() nvme_fc_init_ctrl() allocates the admin blk-mq resources right after nvme_add_ctrl() succeeds. If any of the subsequent steps fail (changing the controller state, scheduling connect work, etc.), we jump to the fail_ctrl path, which tears down the controller references but never frees the admin queue/tag set. The leaked blk-mq allocations match the kmemleak report seen during blktests nvme/fc. Check ctrl->ctrl.admin_tagset in the fail_ctrl path and call nvme_remove_admin_tag_set() when it is set so that all admin queue allocations are reclaimed whenever controller setup aborts. Reported-by: Yi Zhang Reviewed-by: Justin Tee Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index bc455fa98246..6948de3f438a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3587,6 +3587,8 @@ fail_ctrl: ctrl->ctrl.opts = NULL; + if (ctrl->ctrl.admin_tagset) + nvme_remove_admin_tag_set(&ctrl->ctrl); /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); -- cgit v1.2.3 From ca22c566b89164f6e670af56ecc45f47ef3df819 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Thu, 8 Jan 2026 10:22:10 -0700 Subject: block: zero non-PI portion of auto integrity buffer The auto-generated integrity buffer for writes needs to be fully initialized before being passed to the underlying block device, otherwise the uninitialized memory can be read back by userspace or anyone with physical access to the storage device. If protection information is generated, that portion of the integrity buffer is already initialized. The integrity data is also zeroed if PI generation is disabled via sysfs or the PI tuple size is 0. However, this misses the case where PI is generated and the PI tuple size is nonzero, but the metadata size is larger than the PI tuple. In this case, the remainder ("opaque") of the metadata is left uninitialized. Generalize the BLK_INTEGRITY_CSUM_NONE check to cover any case when the metadata is larger than just the PI tuple. Signed-off-by: Caleb Sander Mateos Fixes: c546d6f43833 ("block: only zero non-PI metadata tuples in bio_integrity_prep") Reviewed-by: Anuj Gupta Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/bio-integrity-auto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c index 9850c338548d..cff025b06be1 100644 --- a/block/bio-integrity-auto.c +++ b/block/bio-integrity-auto.c @@ -140,7 +140,7 @@ bool bio_integrity_prep(struct bio *bio) return true; set_flags = false; gfp |= __GFP_ZERO; - } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) + } else if (bi->metadata_size > bi->pi_tuple_size) gfp |= __GFP_ZERO; break; default: -- cgit v1.2.3 From 40b94ec7edbbb867c4e26a1a43d2b898f04b93c5 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 13 Jan 2026 12:27:22 +0530 Subject: null_blk: fix kmemleak by releasing references to fault configfs items When CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION is enabled, the null-blk driver sets up fault injection support by creating the timeout_inject, requeue_inject, and init_hctx_fault_inject configfs items as children of the top-level nullbX configfs group. However, when the nullbX device is removed, the references taken to these fault-config configfs items are not released. As a result, kmemleak reports a memory leak, for example: unreferenced object 0xc00000021ff25c40 (size 32): comm "mkdir", pid 10665, jiffies 4322121578 hex dump (first 32 bytes): 69 6e 69 74 5f 68 63 74 78 5f 66 61 75 6c 74 5f init_hctx_fault_ 69 6e 6a 65 63 74 00 88 00 00 00 00 00 00 00 00 inject.......... backtrace (crc 1a018c86): __kmalloc_node_track_caller_noprof+0x494/0xbd8 kvasprintf+0x74/0xf4 config_item_set_name+0xf0/0x104 config_group_init_type_name+0x48/0xfc fault_config_init+0x48/0xf0 0xc0080000180559e4 configfs_mkdir+0x304/0x814 vfs_mkdir+0x49c/0x604 do_mkdirat+0x314/0x3d0 sys_mkdir+0xa0/0xd8 system_call_exception+0x1b0/0x4f0 system_call_vectored_common+0x15c/0x2ec Fix this by explicitly releasing the references to the fault-config configfs items when dropping the reference to the top-level nullbX configfs group. Cc: stable@vger.kernel.org Reviewed-by: Chaitanya Kulkarni Fixes: bb4c19e030f4 ("block: null_blk: make fault-injection dynamically configurable per device") Signed-off-by: Nilay Shroff Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index c7c0fb79a6bf..4c0632ab4e1b 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -665,12 +665,22 @@ static void nullb_add_fault_config(struct nullb_device *dev) configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group); } +static void nullb_del_fault_config(struct nullb_device *dev) +{ + config_item_put(&dev->init_hctx_fault_config.group.cg_item); + config_item_put(&dev->requeue_config.group.cg_item); + config_item_put(&dev->timeout_config.group.cg_item); +} + #else static void nullb_add_fault_config(struct nullb_device *dev) { } +static void nullb_del_fault_config(struct nullb_device *dev) +{ +} #endif static struct @@ -702,7 +712,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) null_del_dev(dev->nullb); mutex_unlock(&lock); } - + nullb_del_fault_config(dev); config_item_put(item); } -- cgit v1.2.3 From 2fa8961d3a6a1c2395d8d560ffed2c782681bade Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 18 Aug 2025 11:32:45 +0200 Subject: nvmet-tcp: fixup hang in nvmet_tcp_listen_data_ready() When the socket is closed while in TCP_LISTEN a callback is run to flush all outstanding packets, which in turns calls nvmet_tcp_listen_data_ready() with the sk_callback_lock held. So we need to check if we are in TCP_LISTEN before attempting to get the sk_callback_lock() to avoid a deadlock. Link: https://lore.kernel.org/linux-nvme/CAHj4cs-zu7eVB78yUpFjVe2UqMWFkLk8p+DaS3qj+uiGCXBAoA@mail.gmail.com/ Tested-by: Yi Zhang Reviewed-by: Sagi Grimberg Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index d5966d007ba3..549a4786d1c3 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -2004,14 +2004,13 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk) trace_sk_data_ready(sk); + if (sk->sk_state != TCP_LISTEN) + return; + read_lock_bh(&sk->sk_callback_lock); port = sk->sk_user_data; - if (!port) - goto out; - - if (sk->sk_state == TCP_LISTEN) + if (port) queue_work(nvmet_wq, &port->accept_work); -out: read_unlock_bh(&sk->sk_callback_lock); } -- cgit v1.2.3 From 84164acba33158208c2b0e8e5607bdd43edc0dd4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sun, 21 Dec 2025 16:37:14 +0900 Subject: nvmet: do not copy beyond sybsysnqn string length Commit edd17206e363 ("nvmet: remove redundant subsysnqn field from ctrl") replaced ctrl->subsysnqn with ctrl->subsys->subsysnqn. This change works as expected because both point to strings with the same data. However, their memory allocation lengths differ. ctrl->subsysnqn had the fixed size defined as NVMF_NQN_FILED_LEN, while ctrl->subsys->subsysnqn has variable length determined by kstrndup(). Due to this difference, KASAN slab-out-of-bounds occurs at memcpy() in nvmet_passthru_override_id_ctrl() after the commit. The failure can be recreated by running the blktests test case nvme/033. To prevent such failures, replace memcpy() with strscpy(), which copies only the string length and avoids overruns. Fixes: edd17206e363 ("nvmet: remove redundant subsysnqn field from ctrl") Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/passthru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 96648ec2fadb..67c423a8b052 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -150,7 +150,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) * code path with duplicate ctrl subsysnqn. In order to prevent that we * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn. */ - memcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); + strscpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* use fabric id-ctrl values */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + -- cgit v1.2.3 From 0edb475ac0a7d153318a24d4dca175a270a5cc4f Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 14 Jan 2026 12:54:13 +0530 Subject: nvme: fix PCIe subsystem reset controller state transition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit d2fe192348f9 (“nvme: only allow entering LIVE from CONNECTING state”) disallows controller state transitions directly from RESETTING to LIVE. However, the NVMe PCIe subsystem reset path relies on this transition to recover the controller on PowerPC (PPC) systems. On PPC systems, issuing a subsystem reset causes a temporary loss of communication with the NVMe adapter. A subsequent PCIe MMIO read then triggers EEH recovery, which restores the PCIe link and brings the controller back online. For EEH recovery to proceed correctly, the controller must transition back to the LIVE state. Due to the changes introduced by commit d2fe192348f9 (“nvme: only allow entering LIVE from CONNECTING state”), the controller can no longer transition directly from RESETTING to LIVE. As a result, EEH recovery exits prematurely, leaving the controller stuck in the RESETTING state. Fix this by explicitly transitioning the controller state from RESETTING to CONNECTING and then to LIVE. This satisfies the updated state transition rules and allows the controller to be successfully recovered on PPC systems following a PCIe subsystem reset. Cc: stable@vger.kernel.org Fixes: d2fe192348f9 ("nvme: only allow entering LIVE from CONNECTING state") Reviewed-by: Daniel Wagner Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 29e715d5b8f3..58f3097888a7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1532,7 +1532,10 @@ static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl) } writel(NVME_SUBSYS_RESET, dev->bar + NVME_REG_NSSR); - nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE); + + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) || + !nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) + goto unlock; /* * Read controller status to flush the previous write and trigger a -- cgit v1.2.3 From ec19ed2b3e2af8ec5380400cdee9cb6560144506 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 12 Jan 2026 15:19:28 -0800 Subject: rnbd-clt: fix refcount underflow in device unmap path During device unmapping (triggered by module unload or explicit unmap), a refcount underflow occurs causing a use-after-free warning: [14747.574913] ------------[ cut here ]------------ [14747.574916] refcount_t: underflow; use-after-free. [14747.574917] WARNING: lib/refcount.c:28 at refcount_warn_saturate+0x55/0x90, CPU#9: kworker/9:1/378 [14747.574924] Modules linked in: rnbd_client(-) rtrs_client rnbd_server rtrs_server rtrs_core ... [14747.574998] CPU: 9 UID: 0 PID: 378 Comm: kworker/9:1 Tainted: G O N 6.19.0-rc3lblk-fnext+ #42 PREEMPT(voluntary) [14747.575005] Workqueue: rnbd_clt_wq unmap_device_work [rnbd_client] [14747.575010] RIP: 0010:refcount_warn_saturate+0x55/0x90 [14747.575037] Call Trace: [14747.575038] [14747.575038] rnbd_clt_unmap_device+0x170/0x1d0 [rnbd_client] [14747.575044] process_one_work+0x211/0x600 [14747.575052] worker_thread+0x184/0x330 [14747.575055] ? __pfx_worker_thread+0x10/0x10 [14747.575058] kthread+0x10d/0x250 [14747.575062] ? __pfx_kthread+0x10/0x10 [14747.575066] ret_from_fork+0x319/0x390 [14747.575069] ? __pfx_kthread+0x10/0x10 [14747.575072] ret_from_fork_asm+0x1a/0x30 [14747.575083] [14747.575096] ---[ end trace 0000000000000000 ]--- Befor this patch :- The bug is a double kobject_put() on dev->kobj during device cleanup. Kobject Lifecycle: kobject_init_and_add() sets kobj.kref = 1 (initialization) kobject_put() sets kobj.kref = 0 (should be called once) * Before this patch: rnbd_clt_unmap_device() rnbd_destroy_sysfs() kobject_del(&dev->kobj) [remove from sysfs] kobject_put(&dev->kobj) PUT #1 (WRONG!) kref: 1 to 0 rnbd_dev_release() kfree(dev) [DEVICE FREED!] rnbd_destroy_gen_disk() [use-after-free!] rnbd_clt_put_dev() refcount_dec_and_test(&dev->refcount) kobject_put(&dev->kobj) PUT #2 (UNDERFLOW!) kref: 0 to -1 [WARNING!] The first kobject_put() in rnbd_destroy_sysfs() prematurely frees the device via rnbd_dev_release(), then the second kobject_put() in rnbd_clt_put_dev() causes refcount underflow. * After this patch :- Remove kobject_put() from rnbd_destroy_sysfs(). This function should only remove sysfs visibility (kobject_del), not manage object lifetime. Call Graph (FIXED): rnbd_clt_unmap_device() rnbd_destroy_sysfs() kobject_del(&dev->kobj) [remove from sysfs only] [kref unchanged: 1] rnbd_destroy_gen_disk() [device still valid] rnbd_clt_put_dev() refcount_dec_and_test(&dev->refcount) kobject_put(&dev->kobj) ONLY PUT (CORRECT!) kref: 1 to 0 [BALANCED] rnbd_dev_release() kfree(dev) [CLEAN DESTRUCTION] This follows the kernel pattern where sysfs removal (kobject_del) is separate from object destruction (kobject_put). Fixes: 581cf833cac4 ("block: rnbd: add .release to rnbd_dev_ktype") Signed-off-by: Chaitanya Kulkarni Acked-by: Jack Wang Reviewed-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index d1c354636315..8194a970f002 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1662,7 +1662,6 @@ static void destroy_sysfs(struct rnbd_clt_dev *dev, /* To avoid deadlock firstly remove itself */ sysfs_remove_file_self(&dev->kobj, sysfs_self); kobject_del(&dev->kobj); - kobject_put(&dev->kobj); } } -- cgit v1.2.3