From 09861858a68342f851f71c669ac0f69865c32151 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 2 Mar 2026 03:51:49 -0500 Subject: vhost: fix vhost_get_avail_idx for a non empty ring vhost_get_avail_idx is supposed to report whether it has updated vq->avail_idx. Instead, it returns whether all entries have been consumed, which is usually the same. But not always - in drivers/vhost/net.c and when mergeable buffers have been enabled, the driver checks whether the combined entries are big enough to store an incoming packet. If not, the driver re-enables notifications with available entries still in the ring. The incorrect return value from vhost_get_avail_idx propagates through vhost_enable_notify and causes the host to livelock if the guest is not making progress, as vhost will immediately disable notifications and retry using the available entries. This goes back to commit d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()") which changed vhost_enable_notify() to compare the freshly read avail index against vq->last_avail_idx instead of the previously cached vq->avail_idx. Commit 7ad472397667 ("vhost: move smp_rmb() into vhost_get_avail_idx()") then carried over the same comparison when refactoring vhost_enable_notify() to call the unified vhost_get_avail_idx(). The obvious fix is to make vhost_get_avail_idx do what the comment says it does and report whether new entries have been added. Reported-by: ShuangYu Fixes: d3bb267bbdcb ("vhost: cache avail index in vhost_enable_notify()") Cc: Stefan Hajnoczi Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Signed-off-by: Michael S. Tsirkin Message-Id: <559b04ae6ce52973c535dc47e461638b7f4c3d63.1772441455.git.mst@redhat.com> --- drivers/vhost/vhost.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2f2c45d20883..db329a6f6145 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1522,6 +1522,7 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq) { __virtio16 idx; + u16 avail_idx; int r; r = vhost_get_avail(vq, idx, &vq->avail->idx); @@ -1532,17 +1533,19 @@ static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq) } /* Check it isn't doing very strange thing with available indexes */ - vq->avail_idx = vhost16_to_cpu(vq, idx); - if (unlikely((u16)(vq->avail_idx - vq->last_avail_idx) > vq->num)) { + avail_idx = vhost16_to_cpu(vq, idx); + if (unlikely((u16)(avail_idx - vq->last_avail_idx) > vq->num)) { vq_err(vq, "Invalid available index change from %u to %u", - vq->last_avail_idx, vq->avail_idx); + vq->last_avail_idx, avail_idx); return -EINVAL; } /* We're done if there is nothing new */ - if (vq->avail_idx == vq->last_avail_idx) + if (avail_idx == vq->avail_idx) return 0; + vq->avail_idx = avail_idx; + /* * We updated vq->avail_idx so we need a memory barrier between * the index read above and the caller reading avail ring entries. -- cgit v1.2.3 From f7d380fb525c13bdd114369a1979c80c346e6abc Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Sun, 15 Mar 2026 21:18:08 +0700 Subject: virtio_pci: fix vq info pointer lookup via wrong index Unbinding a virtio balloon device: echo virtio0 > /sys/bus/virtio/drivers/virtio_balloon/unbind triggers a NULL pointer dereference. The dmesg says: BUG: kernel NULL pointer dereference, address: 0000000000000008 [...] RIP: 0010:__list_del_entry_valid_or_report+0x5/0xf0 Call Trace: vp_del_vqs+0x121/0x230 remove_common+0x135/0x150 virtballoon_remove+0xee/0x100 virtio_dev_remove+0x3b/0x80 device_release_driver_internal+0x187/0x2c0 unbind_store+0xb9/0xe0 kernfs_fop_write_iter.llvm.11660790530567441834+0xf6/0x180 vfs_write+0x2a9/0x3b0 ksys_write+0x5c/0xd0 do_syscall_64+0x54/0x230 entry_SYSCALL_64_after_hwframe+0x29/0x31 [...] The virtio_balloon device registers 5 queues (inflate, deflate, stats, free_page, reporting) but only the first two are unconditional. The stats, free_page and reporting queues are each conditional on their respective feature bits. When any of these features are absent, the corresponding vqs_info entry has name == NULL, creating holes in the array. The root cause is an indexing mismatch introduced when vq info storage was changed to be passed as an argument. vp_find_vqs_msix() and vp_find_vqs_intx() store the info pointer at vp_dev->vqs[i], where 'i' is the caller's sparse array index. However, the virtqueue itself gets vq->index assigned from queue_idx, a dense index that skips NULL entries. When holes exist, 'i' and queue_idx diverge. Later, vp_del_vqs() looks up info via vp_dev->vqs[vq->index] using the dense index into the sparsely-populated array, and hits NULL. Fix this by storing info at vp_dev->vqs[queue_idx] instead of vp_dev->vqs[i], so the store index matches the lookup index (vq->index). Apply the fix to both the MSIX and INTX paths. Cc: Yichun Zhang Cc: Jiri Pirko Cc: stable@vger.kernel.org # v6.11+ Tested-by: Yuka Fixes: 89a1c435aec2 ("virtio_pci: pass vq info as an argument to vp_setup_vq()") Signed-off-by: Ammar Faizi Message-Id: <20260315141808.547081-1-ammarfaizi2@openresty.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index da97b6a988de..164f480b18a6 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -423,10 +423,11 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, vqs[i] = NULL; continue; } - vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, + vqs[i] = vp_find_one_vq_msix(vdev, queue_idx, vqi->callback, vqi->name, vqi->ctx, false, &allocated_vectors, vector_policy, - &vp_dev->vqs[i]); + &vp_dev->vqs[queue_idx]); + queue_idx++; if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; @@ -485,9 +486,10 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, vqs[i] = NULL; continue; } - vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, + vqs[i] = vp_setup_vq(vdev, queue_idx, vqi->callback, vqi->name, vqi->ctx, - VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); + VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[queue_idx]); + queue_idx++; if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; -- cgit v1.2.3 From e3046eeada299f917a8ad883af4434bfb86556b1 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Sun, 31 May 2026 10:22:51 -0400 Subject: hwrng: virtio: clamp device-reported used.len at copy_data() random_recv_done() stores the device-reported used.len directly into vi->data_avail. copy_data() then indexes vi->data[] using vi->data_idx (advanced by previous copy_data() calls) and issues a memcpy() without re-validating either value against the posted buffer size sizeof(vi->data) (SMP_CACHE_BYTES bytes, typically 32 or 64). A malicious or buggy virtio-rng backend can set used.len beyond sizeof(vi->data), steering the memcpy() past the end of the inline array into adjacent kmalloc-1k slab bytes. hwrng_fillfn() mixes those bytes into the guest RNG, and guest root can also observe them directly via /dev/hwrng. Concrete impact is inside the guest: - Memory-safety / hardening: any virtio-rng backend that over-reports used.len causes the driver to read past vi->data into unrelated slab contents. hwrng_fillfn() is a kernel thread that runs as soon as the device is probed; no guest userspace interaction is required to first-trigger the OOB. - Cross-boundary leak (confidential-compute threat model): a malicious hypervisor cooperating with a malicious or compromised guest root userspace can use /dev/hwrng as a leak channel for guest-kernel heap data. The host sets a large used.len, guest root reads /dev/hwrng, and the returned bytes contain guest kernel slab contents that were adjacent to vi->data. In practice, confidential-compute guests (SEV-SNP, TDX) usually disable virtio-rng entirely, so this path is narrow, but the fix is still worth carrying because the underlying memory-safety bug contaminates the guest RNG on any host. KASAN confirms the OOB on a 7.1-rc4 guest whose virtio-rng backend has been patched to report used.len = 0x10000: BUG: KASAN: slab-out-of-bounds in virtio_read+0x394/0x5d0 Read of size 64 at addr ffff88800ae0ba20 by task hwrng/52 Call Trace: __asan_memcpy+0x23/0x60 virtio_read+0x394/0x5d0 hwrng_fillfn+0xb2/0x470 kthread+0x2cc/0x3a0 Allocated by task 1: probe_common+0xa5/0x660 virtio_dev_probe+0x549/0xbc0 The buggy address belongs to the object at ffff88800ae0b800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 0 bytes to the right of allocated 544-byte region [ffff88800ae0b800, ffff88800ae0ba20) Same class of bug as commit c04db81cd028 ("net/9p: Fix buffer overflow in USB transport layer"), which hardened usb9pfs_rx_complete() against unchecked device-reported length in the USB 9p transport. With the clamp at point of use and array_index_nospec() in place, the same harness boots cleanly: copy_data() returns zero for the bogus report, the device-supplied bytes after data_idx are discarded, and the driver issues a fresh request. Fixes: f7f510ec1957 ("virtio: An entropy device, as suggested by hpa.") Cc: stable@vger.kernel.org Suggested-by: Michael S. Tsirkin Signed-off-by: Michael Bommarito Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Michael S. Tsirkin Message-ID: <20260531142251.2792061-1-michael.bommarito@gmail.com> --- drivers/char/hw_random/virtio-rng.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 0ce02d7e5048..5e83ffa105e4 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -69,8 +70,26 @@ static void request_entropy(struct virtrng_info *vi) static unsigned int copy_data(struct virtrng_info *vi, void *buf, unsigned int size) { - size = min_t(unsigned int, size, vi->data_avail); - memcpy(buf, vi->data + vi->data_idx, size); + unsigned int idx, avail; + + /* + * vi->data_avail was set from the device-reported used.len and + * vi->data_idx was advanced by previous copy_data() calls. A + * malicious or buggy virtio-rng backend can drive either past + * sizeof(vi->data). Clamp at point of use and harden the index + * with array_index_nospec() so the memcpy() below cannot be + * steered into adjacent slab memory, including under + * speculation. + */ + avail = min_t(unsigned int, vi->data_avail, sizeof(vi->data)); + if (vi->data_idx >= avail) { + vi->data_avail = 0; + request_entropy(vi); + return 0; + } + size = min_t(unsigned int, size, avail - vi->data_idx); + idx = array_index_nospec(vi->data_idx, sizeof(vi->data)); + memcpy(buf, vi->data + idx, size); vi->data_idx += size; vi->data_avail -= size; if (vi->data_avail == 0) -- cgit v1.2.3 From bb26ed5f3a8b233e8389b6f946cb1ec269cf45e9 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 13 May 2026 16:50:04 +0200 Subject: vhost/vsock: Refuse the connection immediately when guest isn't ready When the host initiates an AF_VSOCK connect() to a guest that has not yet loaded the virtio-vsock transport (i.e. still booting), the caller blocks for VSOCK_DEFAULT_CONNECT_TIMEOUT. A caller that wants to know if the guest is up yet instead of waiting could theoretically tune SO_VM_SOCKETS_CONNECT_TIMEOUT, but it's tricky to find the right timeout, if not impossible: there's no way to distinguish "guest won't reply because it's not up yet" vs "guest is up and tried to reply, but was too slow". Furthermore, this delay is pointless: - If the guest doesn't initialize within this timeout, connect() returns ETIMEDOUT. - If the guest **does** initialize, it'll reply with RST immediately, because there won't be a listener on the port yet; connect() returns ECONNRESET. That's also inconsistent with the behavior at other initialization stages: if a connection is attempted when the guest driver is already loaded, but nothing is listening yet, we return ECONNRESET immediately without waiting. Fix this by checking the RX virtqueue backend in vhost_transport_send_pkt() before queuing. If it's NULL, return -EHOSTUNREACH immediately. Callers that used to get ETIMEDOUT will now usually get EHOSTUNREACH. Signed-off-by: Denis V. Lunev Co-developed-by: Polina Vishneva Signed-off-by: Polina Vishneva Reviewed-by: Stefano Garzarella Signed-off-by: Michael S. Tsirkin Message-ID: <20260513145842.809404-1-polina.vishneva@virtuozzo.com> --- drivers/vhost/vsock.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers') diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 1d8ec6bed53e..9aaab6bb8061 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -302,6 +302,22 @@ vhost_transport_send_pkt(struct sk_buff *skb, struct net *net) return -ENODEV; } + /* Fast-fail if the guest hasn't enabled the RX vq yet. Queuing the packet + * and making the caller wait is pointless: even if the guest manages to init + * within the timeout, it'll immediately reply with RST, because there's no + * listener on the port yet. + * + * vhost_vq_get_backend() without vq->mutex is acceptable here: locking + * the mutex would be too expensive in this hot path, and we already have + * all the outcomes covered: if the backend becomes NULL right after the check, + * vhost_transport_do_send_pkt() will check it under the mutex anyway. + */ + if (unlikely(!data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX])))) { + rcu_read_unlock(); + kfree_skb(skb); + return -EHOSTUNREACH; + } + if (virtio_vsock_skb_reply(skb)) atomic_inc(&vsock->queued_replies); -- cgit v1.2.3 From e440e077748939839d9f76e24383b76b785f80ce Mon Sep 17 00:00:00 2001 From: Qihang Tang Date: Fri, 8 May 2026 17:46:59 +0800 Subject: vduse: hold vduse_lock across IDR lookup in open path vduse_dev_open() looks up struct vduse_dev through the IDR and then acquires dev->lock only after vduse_lock has been dropped. This leaves a window where a concurrent VDUSE_DESTROY_DEV can remove the same object from the IDR and free it before the open path locks the device, leading to a use-after-free. Close this race by keeping vduse_lock held until dev->lock has been acquired in the open path, matching the lock ordering already used by the destroy path. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Qihang Tang Signed-off-by: Michael S. Tsirkin Message-ID: <20260508094659.94647-1-q.h.hack.winter@gmail.com> --- drivers/vdpa/vdpa_user/vduse_dev.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 6202f6902fcd..d5c34260ed68 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1637,26 +1637,18 @@ static int vduse_dev_release(struct inode *inode, struct file *file) return 0; } -static struct vduse_dev *vduse_dev_get_from_minor(int minor) +static int vduse_dev_open(struct inode *inode, struct file *file) { + int ret = -EBUSY; struct vduse_dev *dev; mutex_lock(&vduse_lock); - dev = idr_find(&vduse_idr, minor); - mutex_unlock(&vduse_lock); - - return dev; -} - -static int vduse_dev_open(struct inode *inode, struct file *file) -{ - int ret; - struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); - - if (!dev) + dev = idr_find(&vduse_idr, iminor(inode)); + if (!dev) { + mutex_unlock(&vduse_lock); return -ENODEV; + } - ret = -EBUSY; mutex_lock(&dev->lock); if (dev->connected) goto unlock; @@ -1666,6 +1658,7 @@ static int vduse_dev_open(struct inode *inode, struct file *file) file->private_data = dev; unlock: mutex_unlock(&dev->lock); + mutex_unlock(&vduse_lock); return ret; } -- cgit v1.2.3 From 929e4f044621c8cc30b612fb74e1410bef09e41b Mon Sep 17 00:00:00 2001 From: Qihang Tang Date: Fri, 8 May 2026 15:58:21 +0800 Subject: vhost/vdpa: validate virtqueue index in mmap and fault paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vhost_vdpa_mmap() and vhost_vdpa_fault() use vma->vm_pgoff as a virtqueue index for get_vq_notification(), but they do not validate that the index is smaller than v->nvqs. The ioctl path already performs both a bounds check and array_index_nospec(), but the mmap/fault path only checks that the index fits in u16. This allows an out-of-range queue index to reach driver-specific get_vq_notification() callbacks. Fix this by extracting a unified vhost_vdpa_get_vq_notification() helper that validates the queue index against v->nvqs and applies array_index_nospec() before calling the driver callback. Both the mmap and fault paths use this helper, and the bounds checking is consolidated into a single location. From source inspection, the most defensible impact is out-of-bounds access in the callback path, potentially leading to invalid PFN remaps and crash/DoS. Fixes: ddd89d0a059d ("vhost_vdpa: support doorbell mapping via mmap") Acked-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Signed-off-by: Qihang Tang Signed-off-by: Michael S. Tsirkin Message-ID: <20260508075821.92656-1-q.h.hack.winter@gmail.com> --- drivers/vhost/vdpa.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 692564b1bcbb..ac55275fa0d0 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1482,16 +1482,32 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) } #ifdef CONFIG_MMU -static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) +static int +vhost_vdpa_get_vq_notification(struct vhost_vdpa *v, unsigned long index, + struct vdpa_notification_area *notify) { - struct vhost_vdpa *v = vmf->vma->vm_file->private_data; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + + if (index > 65535 || index >= v->nvqs) + return -EINVAL; + + index = array_index_nospec(index, v->nvqs); + + *notify = ops->get_vq_notification(vdpa, index); + + return 0; +} + +static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) +{ + struct vhost_vdpa *v = vmf->vma->vm_file->private_data; struct vdpa_notification_area notify; struct vm_area_struct *vma = vmf->vma; - u16 index = vma->vm_pgoff; + unsigned long index = vma->vm_pgoff; - notify = ops->get_vq_notification(vdpa, index); + if (vhost_vdpa_get_vq_notification(v, index, ¬ify)) + return VM_FAULT_SIGBUS; return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } @@ -1514,8 +1530,6 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) return -EINVAL; if (vma->vm_flags & VM_READ) return -EINVAL; - if (index > 65535) - return -EINVAL; if (!ops->get_vq_notification) return -ENOTSUPP; @@ -1523,7 +1537,8 @@ static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) * support the doorbell which sits on the page boundary and * does not share the page with other registers. */ - notify = ops->get_vq_notification(vdpa, index); + if (vhost_vdpa_get_vq_notification(v, index, ¬ify)) + return -EINVAL; if (notify.addr & (PAGE_SIZE - 1)) return -EINVAL; if (vma->vm_end - vma->vm_start != notify.size) -- cgit v1.2.3 From c687bc35694698ec4c7f92bf929c3d659f0cecb8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 27 Apr 2026 16:37:10 +0200 Subject: virtio-mmio: fix device release warning on module unload Driver core expects devices to be allocated dynamically and complains loudly when a device that lacks a release function is freed. Use __root_device_register() to allocate and register the root device instead of open coding using a static device. Note that root_device_register(), which also creates a link to the module, cannot be used as the device is registered when parsing the module parameters which happens before the module kobject has been set up. Fixes: 81a054ce0b46 ("virtio-mmio: Devices parameter parsing") Cc: stable@vger.kernel.org # 3.5 Cc: Pawel Moll Signed-off-by: Johan Hovold Signed-off-by: Michael S. Tsirkin Message-ID: <20260427143710.14702-1-johan@kernel.org> --- drivers/virtio/virtio_mmio.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 595c2274fbb5..510b7c4efdff 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -662,9 +662,7 @@ static void virtio_mmio_remove(struct platform_device *pdev) #if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES) -static struct device vm_cmdline_parent = { - .init_name = "virtio-mmio-cmdline", -}; +static struct device *vm_cmdline_parent; static int vm_cmdline_parent_registered; static int vm_cmdline_id; @@ -672,7 +670,6 @@ static int vm_cmdline_id; static int vm_cmdline_set(const char *device, const struct kernel_param *kp) { - int err; struct resource resources[2] = {}; char *str; long long base, size; @@ -704,11 +701,10 @@ static int vm_cmdline_set(const char *device, resources[1].start = resources[1].end = irq; if (!vm_cmdline_parent_registered) { - err = device_register(&vm_cmdline_parent); - if (err) { - put_device(&vm_cmdline_parent); + vm_cmdline_parent = __root_device_register("virtio-mmio-cmdline", NULL); + if (IS_ERR(vm_cmdline_parent)) { pr_err("Failed to register parent device!\n"); - return err; + return PTR_ERR(vm_cmdline_parent); } vm_cmdline_parent_registered = 1; } @@ -719,7 +715,7 @@ static int vm_cmdline_set(const char *device, (unsigned long long)resources[0].end, (int)resources[1].start); - pdev = platform_device_register_resndata(&vm_cmdline_parent, + pdev = platform_device_register_resndata(vm_cmdline_parent, "virtio-mmio", vm_cmdline_id++, resources, ARRAY_SIZE(resources), NULL, 0); @@ -743,8 +739,12 @@ static int vm_cmdline_get_device(struct device *dev, void *data) static int vm_cmdline_get(char *buffer, const struct kernel_param *kp) { buffer[0] = '\0'; - device_for_each_child(&vm_cmdline_parent, buffer, - vm_cmdline_get_device); + + if (vm_cmdline_parent_registered) { + device_for_each_child(vm_cmdline_parent, buffer, + vm_cmdline_get_device); + } + return strlen(buffer) + 1; } @@ -766,9 +766,9 @@ static int vm_unregister_cmdline_device(struct device *dev, static void vm_unregister_cmdline_devices(void) { if (vm_cmdline_parent_registered) { - device_for_each_child(&vm_cmdline_parent, NULL, + device_for_each_child(vm_cmdline_parent, NULL, vm_unregister_cmdline_device); - device_unregister(&vm_cmdline_parent); + root_device_unregister(vm_cmdline_parent); vm_cmdline_parent_registered = 0; } } -- cgit v1.2.3 From 548d2208455f14e6121404c6e30e997bfe0cd264 Mon Sep 17 00:00:00 2001 From: Jia Jia Date: Thu, 7 May 2026 20:08:01 +0800 Subject: virtio: rtc: tear down old virtqueues before restore virtio_device_restore() resets the device and restores the negotiated features before calling ->restore(). viortc_freeze() intentionally leaves the existing virtqueues in place so the alarm queue can still wake the system, but viortc_restore() immediately calls viortc_init_vqs() without first deleting those old queues. If virtqueue reinitialization fails on virtio-pci, the transport error path can run vp_del_vqs() against a newly allocated vp_dev->vqs array while vdev->vqs still contains the old virtqueues. vp_del_vqs() then looks up queue state through the new array and can dereference a NULL info pointer in vp_del_vq(), crashing the guest kernel during restore. This can also happen during a non-faulty reinitialization, when one of the vp_find_vqs_msix() attempts is unsuccessful before a later attempt would succeed. Delete the stale virtqueues before rebuilding them. If restore fails before virtio_device_ready(), reuse the remove path to stop the device. Once the device is ready, return errors directly instead of deleting the virtqueues again. Fixes: 0623c7592768 ("virtio_rtc: Add module and driver core") Signed-off-by: Jia Jia Reviewed-by: Peter Hilber Signed-off-by: Michael S. Tsirkin Message-ID: <20260507120801.3677552-1-physicalmtea@gmail.com> --- drivers/virtio/virtio_rtc_driver.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_rtc_driver.c b/drivers/virtio/virtio_rtc_driver.c index a57d5e06e19d..4419735b0f0d 100644 --- a/drivers/virtio/virtio_rtc_driver.c +++ b/drivers/virtio/virtio_rtc_driver.c @@ -1257,6 +1257,15 @@ static int viortc_init_vqs(struct viortc_dev *viortc) return 0; } +static void __viortc_remove(struct viortc_dev *viortc) +{ + struct virtio_device *vdev = viortc->vdev; + + viortc_clocks_deinit(viortc); + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); +} + /** * viortc_probe() - probe a virtio_rtc virtio device * @vdev: virtio device @@ -1282,7 +1291,7 @@ static int viortc_probe(struct virtio_device *vdev) ret = viortc_init_vqs(viortc); if (ret) - return ret; + goto err_reset_vdev; virtio_device_ready(vdev); @@ -1329,10 +1338,7 @@ static void viortc_remove(struct virtio_device *vdev) { struct viortc_dev *viortc = vdev->priv; - viortc_clocks_deinit(viortc); - - virtio_reset_device(vdev); - vdev->config->del_vqs(vdev); + __viortc_remove(viortc); } static int viortc_freeze(struct virtio_device *dev) @@ -1353,9 +1359,11 @@ static int viortc_restore(struct virtio_device *dev) bool notify = false; int ret; + dev->config->del_vqs(dev); + ret = viortc_init_vqs(viortc); if (ret) - return ret; + goto err_remove; alarm_viortc_vq = &viortc->vqs[VIORTC_ALARMQ]; alarm_vq = alarm_viortc_vq->vq; @@ -1364,7 +1372,7 @@ static int viortc_restore(struct virtio_device *dev) ret = viortc_populate_vq(viortc, alarm_viortc_vq, VIORTC_ALARMQ_BUF_CAP, false); if (ret) - return ret; + goto err_remove; notify = virtqueue_kick_prepare(alarm_vq); } @@ -1372,8 +1380,12 @@ static int viortc_restore(struct virtio_device *dev) virtio_device_ready(dev); if (notify && !virtqueue_notify(alarm_vq)) - ret = -EIO; + return -EIO; + + return 0; +err_remove: + __viortc_remove(viortc); return ret; } -- cgit v1.2.3 From b3592a32b34f37874dc94aa1a0d15c4334ed86ca Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Mon, 23 Feb 2026 18:37:02 +0100 Subject: virtio_console: read size from config space during device init Previously, the size was only read upon receiving the config interrupt. This interrupt is sent when the size changes. However, we also need to read the initial size. Also make sure to only read the size from config if F_SIZE is enabled. Fixes: 9778829cffd4 ("virtio: console: Store each console's size in the console structure") Signed-off-by: Filip Hejsek Signed-off-by: Michael S. Tsirkin Message-ID: <20260223-virtio-console-fix-v1-1-0cf08303b428@gmail.com> --- drivers/char/virtio_console.c | 52 ++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 9a33217c68d9..198b97314168 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1771,32 +1771,40 @@ static void config_intr(struct virtio_device *vdev) schedule_work(&portdev->config_work); } -static void config_work_handler(struct work_struct *work) +static void update_size_from_config(struct ports_device *portdev) { - struct ports_device *portdev; + struct virtio_device *vdev; + struct port *port; + u16 rows, cols; - portdev = container_of(work, struct ports_device, config_work); - if (!use_multiport(portdev)) { - struct virtio_device *vdev; - struct port *port; - u16 rows, cols; + vdev = portdev->vdev; - vdev = portdev->vdev; - virtio_cread(vdev, struct virtio_console_config, cols, &cols); - virtio_cread(vdev, struct virtio_console_config, rows, &rows); + /* + * We'll use this way of resizing only for legacy support. + * For multiport devices, use control messages to indicate + * console size changes so that it can be done per-port. + * + * Don't test F_SIZE at all if we're rproc: not a valid feature. + */ + if (is_rproc_serial(vdev) || + use_multiport(portdev) || + !virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) + return; - port = find_port_by_id(portdev, 0); - set_console_size(port, rows, cols); + virtio_cread(vdev, struct virtio_console_config, cols, &cols); + virtio_cread(vdev, struct virtio_console_config, rows, &rows); - /* - * We'll use this way of resizing only for legacy - * support. For newer userspace - * (VIRTIO_CONSOLE_F_MULTPORT+), use control messages - * to indicate console size changes so that it can be - * done per-port. - */ - resize_console(port); - } + port = find_port_by_id(portdev, 0); + set_console_size(port, rows, cols); + resize_console(port); +} + +static void config_work_handler(struct work_struct *work) +{ + struct ports_device *portdev; + + portdev = container_of(work, struct ports_device, config_work); + update_size_from_config(portdev); } static int init_vqs(struct ports_device *portdev) @@ -2052,6 +2060,8 @@ static int virtcons_probe(struct virtio_device *vdev) __send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID, VIRTIO_CONSOLE_DEVICE_READY, 1); + update_size_from_config(portdev); + return 0; free_chrdev: -- cgit v1.2.3 From 4c653e85857b41a7148917f2628fae1d04a9c251 Mon Sep 17 00:00:00 2001 From: Evgenii Burenchev Date: Thu, 26 Feb 2026 18:29:23 +0300 Subject: vdpa/ifcvf: handle dev_set_name() failure in ifcvf_vdpa_dev_add() dev_set_name() may fail and return an error, but its return value is currently ignored and overwritten by _vdpa_register_device(). Abort device creation if dev_set_name() fails and release the device reference to avoid continuing with an improperly initialized struct device. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Evgenii Burenchev Acked-by: Jason Wang Acked-by: Zhu Lingshan Signed-off-by: Michael S. Tsirkin Message-ID: <20260226152924.38790-1-evg28bur@yandex.ru> --- drivers/vdpa/ifcvf/ifcvf_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index d46c1606c97a..ab6d6ab3b3d8 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -734,15 +734,22 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, ret = dev_set_name(&vdpa_dev->dev, "%s", name); else ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index); + if (ret) { + IFCVF_ERR(pdev, "Failed to set device name"); + goto err; + } ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring); if (ret) { - put_device(&adapter->vdpa.dev); IFCVF_ERR(pdev, "Failed to register to vDPA bus"); - return ret; + goto err; } return 0; + +err: + put_device(&adapter->vdpa.dev); + return ret; } static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) -- cgit v1.2.3 From 8da308e2444d92d8ddffaee0279c96c3af84b4be Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Mon, 26 Jan 2026 17:45:36 +0800 Subject: vdpa/mlx5: update mlx_features with driver state check Add logic in mlx5_vdpa_set_attr() to ensure the VIRTIO_NET_F_MAC feature bit is properly set only when the device is not yet in the DRIVER_OK (running) state. This makes the MAC address visible in the output of: vdpa dev config show -jp when the device is created without an initial MAC address. Signed-off-by: Cindy Lu Reviewed-by: Dragos Tatulea Signed-off-by: Michael S. Tsirkin Message-ID: <20260126094848.9601-2-lulu@redhat.com> --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index ad0d5fbbbca8..14d3fff7bcb7 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -4057,7 +4057,7 @@ static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; - int err = -EOPNOTSUPP; + int err = 0; mvdev = to_mvdev(dev); ndev = to_mlx5_vdpa_ndev(mvdev); -- cgit v1.2.3 From c3c33e002b58ebcbd6c5f6e00643f7437546e4f7 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Mon, 26 Jan 2026 17:45:38 +0800 Subject: vdpa/mlx5: update MAC address handling in mlx5_vdpa_set_attr() Improve MAC address handling in mlx5_vdpa_set_attr() to ensure that old MAC entries are properly removed from the MPFS table before adding a new one. The new MAC address is then added to both the MPFS and VLAN tables. This change fixes an issue where the updated MAC address would not take effect until QEMU was rebooted. Signed-off-by: Cindy Lu Reviewed-by: Dragos Tatulea Signed-off-by: Michael S. Tsirkin Message-ID: <20260126094848.9601-4-lulu@redhat.com> --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 14d3fff7bcb7..ad0d5fbbbca8 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -4057,7 +4057,7 @@ static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; - int err = 0; + int err = -EOPNOTSUPP; mvdev = to_mvdev(dev); ndev = to_mlx5_vdpa_ndev(mvdev); -- cgit v1.2.3 From 373ec43ded742b2f3aecf14731ffe1a57f438f38 Mon Sep 17 00:00:00 2001 From: Zhang Tianci Date: Thu, 26 Feb 2026 19:55:49 +0800 Subject: vduse: Requeue failed read to send_list head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When copy_to_iter() fails in vduse_dev_read_iter(), put the message back at the head of send_list to preserve FIFO ordering and retry the oldest pending request first. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Reported-by: Michael S. Tsirkin Suggested-by: Xie Yongji Signed-off-by: Zhang Tianci Reviewed-by: Xie Yongji Acked-by: Jason Wang Acked-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Message-ID: <20260226115550.1814-2-zhangtianci.1997@bytedance.com> --- drivers/vdpa/vdpa_user/vduse_dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index d5c34260ed68..a479fef535ac 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -221,6 +221,12 @@ static void vduse_enqueue_msg(struct list_head *head, list_add_tail(&msg->list, head); } +static void vduse_enqueue_msg_head(struct list_head *head, + struct vduse_dev_msg *msg) +{ + list_add(&msg->list, head); +} + static void vduse_dev_broken(struct vduse_dev *dev) { struct vduse_dev_msg *msg, *tmp; @@ -387,7 +393,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) spin_lock(&dev->msg_lock); if (ret != size) { ret = -EFAULT; - vduse_enqueue_msg(&dev->send_list, msg); + vduse_enqueue_msg_head(&dev->send_list, msg); goto unlock; } vduse_enqueue_msg(&dev->recv_list, msg); -- cgit v1.2.3 From ae9c13b6fd79087cc5a216ee1649b6f012c2a238 Mon Sep 17 00:00:00 2001 From: Zhang Tianci Date: Thu, 26 Feb 2026 19:55:50 +0800 Subject: vduse: Fix race in vduse_dev_msg_sync and vduse_dev_read_iter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is one race case in vduse_dev_msg_sync and vduse_dev_read_iter: vduse_dev_read_iter(): lock(msg_lock); dequeue_msg(send_list); unlock(msg_lock); vduse_dev_msg_sync(): wait_timeout() finish lock(msg_lock); check msg->complete is false list_del(msg); <- double list_del() crash! To fix this case, we shall ensure vduse_msg is on send_list or recv_list outside the msg_lock critical section. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Cc: stable@vger.kernel.org Signed-off-by: Zhang Tianci Reviewed-by: Xie Yongji Acked-by: Jason Wang Acked-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Message-ID: <20260226115550.1814-3-zhangtianci.1997@bytedance.com> --- drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index a479fef535ac..81409a5bb09b 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -364,6 +364,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) struct file *file = iocb->ki_filp; struct vduse_dev *dev = file->private_data; struct vduse_dev_msg *msg; + struct vduse_dev_request req; int size = sizeof(struct vduse_dev_request); ssize_t ret; @@ -375,12 +376,11 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) msg = vduse_dequeue_msg(&dev->send_list); if (msg) break; + spin_unlock(&dev->msg_lock); - ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) - goto unlock; + return -EAGAIN; - spin_unlock(&dev->msg_lock); ret = wait_event_interruptible_exclusive(dev->waitq, !list_empty(&dev->send_list)); if (ret) @@ -388,17 +388,34 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) spin_lock(&dev->msg_lock); } + + memcpy(&req, &msg->req, sizeof(req)); + /* + * We must ensure vduse_msg is on send_list or recv_list before unlock + * dev->msg_lock. Because vduse_dev_msg_sync() may be timeout when we + * copy data to userspace, and will call list_del() for this msg. + */ + vduse_enqueue_msg(&dev->recv_list, msg); spin_unlock(&dev->msg_lock); - ret = copy_to_iter(&msg->req, size, to); - spin_lock(&dev->msg_lock); + + ret = copy_to_iter(&req, size, to); if (ret != size) { + /* + * Roll back: move msg back to send_list if still pending. + * + * NOTE: + * vduse_find_msg() must use req.request_id instead of `msg`. + * A malicious userspace may reply to this request, and wake up + * the caller, after which `msg` will have already been freed. + * And here vduse_find_msg() will return NULL then do nothing. + */ + spin_lock(&dev->msg_lock); + msg = vduse_find_msg(&dev->recv_list, req.request_id); + if (msg) + vduse_enqueue_msg_head(&dev->send_list, msg); + spin_unlock(&dev->msg_lock); ret = -EFAULT; - vduse_enqueue_msg_head(&dev->send_list, msg); - goto unlock; } - vduse_enqueue_msg(&dev->recv_list, msg); -unlock: - spin_unlock(&dev->msg_lock); return ret; } -- cgit v1.2.3 From 9c1523803445ee0348f62b77793266dd981596e0 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 30 Jan 2026 13:07:50 +0800 Subject: VDUSE: avoid leaking information to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bounceing is not necessarily page aligned, so current VDUSE can leak kernel information through mapping bounce pages to userspace. Allocate bounce pages with __GFP_ZERO to avoid leaking information to userspace. Fixes: 8c773d53fb7b ("vduse: Implement an MMU-based software IOTLB") Cc: stable@vger.kernel.org Signed-off-by: Jason Wang Reviewed-by: Xie Yongji Reviewed-by: Eugenio Pérez Signed-off-by: Michael S. Tsirkin Message-ID: <20260130050750.4050-1-jasowang@redhat.com> --- drivers/vdpa/vdpa_user/iova_domain.c | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c index 806cec32c4bc..4dc76c0d0d13 100644 --- a/drivers/vdpa/vdpa_user/iova_domain.c +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -124,7 +124,7 @@ static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, if (!map->bounce_page) { head_map = &domain->bounce_maps[(iova & PAGE_MASK) >> BOUNCE_MAP_SHIFT]; if (!head_map->bounce_page) { - tmp_page = alloc_page(GFP_ATOMIC); + tmp_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); if (!tmp_page) return -ENOMEM; if (cmpxchg(&head_map->bounce_page, NULL, tmp_page)) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 81409a5bb09b..9ebbe6541157 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -999,7 +999,7 @@ static void *vduse_dev_alloc_coherent(union virtio_map token, size_t size, if (!token.group) return NULL; - addr = alloc_pages_exact(size, flag); + addr = alloc_pages_exact(size, flag | __GFP_ZERO); if (!addr) return NULL; -- cgit v1.2.3 From 8f6898fe80794f2d7c3d38c1158c806e4074a1c4 Mon Sep 17 00:00:00 2001 From: Qing Ming Date: Mon, 1 Jun 2026 18:43:00 +0800 Subject: vhost/net: complete zerocopy ubufs only once vhost-net initializes one ubuf_info per outstanding zerocopy TX descriptor and hands it to the backend socket. The networking stack may then clone a zerocopy skb before all skb references are released. For example, batman-adv fragmentation reaches skb_split(), which calls skb_zerocopy_clone() and increments the same ubuf_info refcount. vhost_zerocopy_complete() currently treats every ubuf callback as a completed vhost descriptor. It dereferences ubuf->ctx, writes the descriptor completion state, and drops the vhost_net_ubuf_ref even when the callback only releases a cloned skb reference. A backend reset can therefore wait for and free the vhost_net_ubuf_ref while another cloned skb still carries the same ubuf_info. A later completion then dereferences the freed ubufs pointer. KASAN reports the stale completion as: BUG: KASAN: slab-use-after-free in vhost_zerocopy_complete+0x1d7/0x1f0 BUG: KASAN: slab-use-after-free in vhost_zerocopy_complete+0x101/0x1f0 vhost_zerocopy_complete skb_copy_ubufs __dev_forward_skb2 veth_xmit The freed object was allocated from vhost_net_ioctl() while setting the backend and freed through kfree_rcu()/kvfree_rcu_bulk after backend removal, while delayed skb completion still reached vhost_zerocopy_complete(). Honor the generic ubuf_info refcount before touching vhost state, and run the vhost descriptor completion only for the final ubuf reference. This matches the msg_zerocopy_complete() ownership rule for cloned zerocopy skbs. Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support") Signed-off-by: Qing Ming Signed-off-by: Michael S. Tsirkin Message-ID: <20260601104300.197210-1-a0yami@mailbox.org> --- drivers/vhost/net.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c6536cad9c4f..b9af63fb6306 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -390,13 +390,20 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net, static void vhost_zerocopy_complete(struct sk_buff *skb, struct ubuf_info *ubuf_base, bool success) { - struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); - struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; - struct vhost_virtqueue *vq = ubufs->vq; + struct ubuf_info_msgzc *ubuf; + struct vhost_net_ubuf_ref *ubufs; + struct vhost_virtqueue *vq; int cnt; - rcu_read_lock_bh(); + /* Only the final cloned skb reference completes the vhost descriptor. */ + if (!refcount_dec_and_test(&ubuf_base->refcnt)) + return; + + ubuf = uarg_to_msgzc(ubuf_base); + ubufs = ubuf->ctx; + vq = ubufs->vq; + rcu_read_lock_bh(); /* set len to mark this desc buffers done DMA */ vq->heads[ubuf->desc].len = success ? VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; -- cgit v1.2.3 From 455a2a1af92651764e9eb42cec0d95ac142afc28 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Feb 2026 16:40:46 +0100 Subject: vduse: fix compat handling for VDUSE_IOTLB_GET_FD/VDUSE_VQ_GET_INFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These two ioctls are incompatible on 32-bit x86 userspace, because the data structures are shorter than they are on 64-bit. Add a proper .compat_ioctl handler for x86 that reads the structures with the smaller padding before calling the internal handlers. On all other architectures, CONFIG_COMPAT_FOR_U64_ALIGNMENT is disabled and no special handling is required. Fixes: ad146355bfad ("vduse: Support querying information of IOVA regions") Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Acked-by: Eugenio Pérez Signed-off-by: Arnd Bergmann Signed-off-by: Michael S. Tsirkin Message-ID: <20260213154051.4172275-1-arnd@kernel.org> --- drivers/vdpa/vdpa_user/vduse_dev.c | 123 ++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 9ebbe6541157..f15ad425e01f 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1641,6 +1641,127 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, return ret; } +#ifdef CONFIG_COMPAT_FOR_U64_ALIGNMENT +/* + * i386 has different alignment constraints than x86_64, + * so there are only 3 bytes of padding instead of 7. + */ +struct compat_vduse_iotlb_entry { + compat_u64 offset; + compat_u64 start; + compat_u64 last; + __u8 perm; + __u8 padding[3]; +}; +#define COMPAT_VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct compat_vduse_iotlb_entry) + +struct compat_vduse_vq_info { + __u32 index; + __u32 num; + compat_u64 desc_addr; + compat_u64 driver_addr; + compat_u64 device_addr; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; + __u8 ready; + __u8 padding[3]; +}; +#define COMPAT_VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct compat_vduse_vq_info) + +static long vduse_dev_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct vduse_dev *dev = file->private_data; + void __user *argp = (void __user *)arg; + int ret; + + if (unlikely(dev->broken)) + return -EPERM; + + switch (cmd) { + case COMPAT_VDUSE_IOTLB_GET_FD: { + struct vduse_iotlb_entry_v2 entry = {0}; + struct file *f = NULL; + + ret = -EFAULT; + if (copy_from_user(&entry, argp, _IOC_SIZE(cmd))) + break; + + ret = vduse_dev_iotlb_entry(dev, &entry, &f, NULL); + if (ret) + break; + + ret = -EINVAL; + if (!f) + break; + + ret = copy_to_user(argp, &entry, _IOC_SIZE(cmd)); + if (ret) { + ret = -EFAULT; + fput(f); + break; + } + ret = receive_fd(f, NULL, perm_to_file_flags(entry.perm)); + fput(f); + break; + } + case COMPAT_VDUSE_VQ_GET_INFO: { + struct vduse_vq_info vq_info = {}; + struct vduse_virtqueue *vq; + u32 index; + + ret = -EFAULT; + if (copy_from_user(&vq_info, argp, + sizeof(struct compat_vduse_vq_info))) + break; + + ret = -EINVAL; + if (vq_info.index >= dev->vq_num) + break; + + index = array_index_nospec(vq_info.index, dev->vq_num); + vq = dev->vqs[index]; + vq_info.desc_addr = vq->desc_addr; + vq_info.driver_addr = vq->driver_addr; + vq_info.device_addr = vq->device_addr; + vq_info.num = vq->num; + + if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { + vq_info.packed.last_avail_counter = + vq->state.packed.last_avail_counter; + vq_info.packed.last_avail_idx = + vq->state.packed.last_avail_idx; + vq_info.packed.last_used_counter = + vq->state.packed.last_used_counter; + vq_info.packed.last_used_idx = + vq->state.packed.last_used_idx; + } else + vq_info.split.avail_index = + vq->state.split.avail_index; + + vq_info.ready = vq->ready; + + ret = -EFAULT; + if (copy_to_user(argp, &vq_info, + sizeof(struct compat_vduse_vq_info))) + break; + + ret = 0; + break; + } + default: + ret = -ENOIOCTLCMD; + break; + } + + return vduse_dev_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define vduse_dev_compat_ioctl compat_ptr_ioctl +#endif + static int vduse_dev_release(struct inode *inode, struct file *file) { struct vduse_dev *dev = file->private_data; @@ -1694,7 +1815,7 @@ static const struct file_operations vduse_dev_fops = { .write_iter = vduse_dev_write_iter, .poll = vduse_dev_poll, .unlocked_ioctl = vduse_dev_ioctl, - .compat_ioctl = compat_ptr_ioctl, + .compat_ioctl = vduse_dev_compat_ioctl, .llseek = noop_llseek, }; -- cgit v1.2.3 From 32fe1de5c12471b8c2d613003bd93d111586a10d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 31 Jan 2026 11:28:09 +0100 Subject: virtio_ring: Add READ_ONCE annotations for device-writable fields KCSAN reports data races when accessing virtio ring fields that are concurrently written by the device (host). These are legitimate concurrent accesses where the CPU reads fields that the device updates via DMA-like mechanisms. Add accessor functions that use READ_ONCE() to properly annotate these device-writable fields and prevent compiler optimizations that could in theory break the code. This also serves as documentation showing which fields are shared with the device. The affected fields are: - Split ring: used->idx, used->ring[].id, used->ring[].len - Packed ring: desc[].flags, desc[].id, desc[].len This patch was partially written using the help of Kiro, an AI coding assistant, to automate the mechanical work of generating the inline function definition. Signed-off-by: Alexander Graf [jth: Add READ_ONCE in virtqueue_kick_prepare_split ] Co-developed-by: Johannes Thumshirn Signed-off-by: Johannes Thumshirn Reviewed-by: Alexander Graf Signed-off-by: Michael S. Tsirkin Message-ID: <20260131102810.1254845-1-johannes.thumshirn@wdc.com> --- drivers/virtio/virtio_ring.c | 77 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fbca7ce1c6bf..b438dc2ce1b8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -272,6 +272,55 @@ struct vring_virtqueue { #endif }; +/* + * Accessors for device-writable fields in virtio rings. + * These fields are concurrently written by the device and read by the driver. + * Use READ_ONCE() to prevent compiler optimizations, document the + * intentional data race and prevent KCSAN warnings. + */ +static inline u16 vring_read_split_used_idx(const struct vring_virtqueue *vq) +{ + return virtio16_to_cpu(vq->vq.vdev, + READ_ONCE(vq->split.vring.used->idx)); +} + +static inline u32 vring_read_split_used_id(const struct vring_virtqueue *vq, + u16 idx) +{ + return virtio32_to_cpu(vq->vq.vdev, + READ_ONCE(vq->split.vring.used->ring[idx].id)); +} + +static inline u32 vring_read_split_used_len(const struct vring_virtqueue *vq, u16 idx) +{ + return virtio32_to_cpu(vq->vq.vdev, + READ_ONCE(vq->split.vring.used->ring[idx].len)); +} + +static inline u16 vring_read_split_avail_event(const struct vring_virtqueue *vq) +{ + return virtio16_to_cpu(vq->vq.vdev, + READ_ONCE(vring_avail_event(&vq->split.vring))); +} + +static inline u16 vring_read_packed_desc_flags(const struct vring_virtqueue *vq, + u16 idx) +{ + return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags)); +} + +static inline u16 vring_read_packed_desc_id(const struct vring_virtqueue *vq, + u16 idx) +{ + return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id)); +} + +static inline u32 vring_read_packed_desc_len(const struct vring_virtqueue *vq, + u16 idx) +{ + return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len)); +} + static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); static void vring_free(struct virtqueue *_vq); @@ -809,8 +858,7 @@ static bool virtqueue_kick_prepare_split(struct vring_virtqueue *vq) LAST_ADD_TIME_INVALID(vq); if (vq->event) { - needs_kick = vring_need_event(virtio16_to_cpu(vq->vq.vdev, - vring_avail_event(&vq->split.vring)), + needs_kick = vring_need_event(vring_read_split_avail_event(vq), new, old); } else { needs_kick = !(vq->split.vring.used->flags & @@ -897,8 +945,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, static bool virtqueue_poll_split(const struct vring_virtqueue *vq, unsigned int last_used_idx) { - return (u16)last_used_idx != virtio16_to_cpu(vq->vq.vdev, - vq->split.vring.used->idx); + return (u16)last_used_idx != vring_read_split_used_idx(vq); } static bool more_used_split(const struct vring_virtqueue *vq) @@ -939,10 +986,8 @@ static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq, virtio_rmb(vq->weak_barriers); last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); - i = virtio32_to_cpu(vq->vq.vdev, - vq->split.vring.used->ring[last_used].id); - *len = virtio32_to_cpu(vq->vq.vdev, - vq->split.vring.used->ring[last_used].len); + i = vring_read_split_used_id(vq, last_used); + *len = vring_read_split_used_len(vq, last_used); if (unlikely(i >= vq->split.vring.num)) { BAD_RING(vq, "id %u out of range\n", i); @@ -1003,10 +1048,8 @@ static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq, */ virtio_rmb(vq->weak_barriers); - vq->batch_last.id = virtio32_to_cpu(vq->vq.vdev, - vq->split.vring.used->ring[last_used_idx].id); - vq->batch_last.len = virtio32_to_cpu(vq->vq.vdev, - vq->split.vring.used->ring[last_used_idx].len); + vq->batch_last.id = vring_read_split_used_id(vq, last_used_idx); + vq->batch_last.len = vring_read_split_used_len(vq, last_used_idx); } if (vq->batch_last.id == last_used) { @@ -1112,7 +1155,7 @@ static bool virtqueue_enable_cb_delayed_split(struct vring_virtqueue *vq) &vring_used_event(&vq->split.vring), cpu_to_virtio16(vq->vq.vdev, vq->last_used_idx + bufs)); - if (unlikely((u16)(virtio16_to_cpu(vq->vq.vdev, vq->split.vring.used->idx) + if (unlikely((u16)(vring_read_split_used_idx(vq) - vq->last_used_idx) > bufs)) { END_USE(vq); return false; @@ -2036,10 +2079,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq, static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, u16 idx, bool used_wrap_counter) { - bool avail, used; u16 flags; + bool avail, used; - flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); + flags = vring_read_packed_desc_flags(vq, idx); avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); @@ -2186,8 +2229,8 @@ static void *virtqueue_get_buf_ctx_packed(struct vring_virtqueue *vq, last_used_idx = READ_ONCE(vq->last_used_idx); used_wrap_counter = packed_used_wrap_counter(last_used_idx); last_used = packed_last_used(last_used_idx); - id = le16_to_cpu(vq->packed.vring.desc[last_used].id); - *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); + id = vring_read_packed_desc_id(vq, last_used); + *len = vring_read_packed_desc_len(vq, last_used); if (unlikely(id >= num)) { BAD_RING(vq, "id %u out of range\n", id); -- cgit v1.2.3 From 7c59cc9cf729092512b88ccd3290b2fd0c7e53c4 Mon Sep 17 00:00:00 2001 From: Maurice Hieronymus Date: Sun, 23 Nov 2025 18:57:48 +0100 Subject: virtio-balloon: Destroy mutex before freeing virtio_balloon Add a call to mutex_destroy in the error code path as well as in the virtballoon_remove code path. Signed-off-by: Maurice Hieronymus Acked-by: David Hildenbrand (Red Hat) Signed-off-by: Michael S. Tsirkin Message-ID: <20251123175750.445461-2-mhi@mailbox.org> --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f6c2dff33f8a..088b3a0e6ce6 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1075,6 +1075,7 @@ out_del_balloon_wq: out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: + mutex_destroy(&vb->balloon_lock); kfree(vb); out: return err; @@ -1119,6 +1120,7 @@ static void virtballoon_remove(struct virtio_device *vdev) } remove_common(vb); + mutex_destroy(&vb->balloon_lock); kfree(vb); } -- cgit v1.2.3 From 7cdaeef19bc87bb93f3083b11dda92e3e9ff855c Mon Sep 17 00:00:00 2001 From: Maurice Hieronymus Date: Sun, 23 Nov 2025 18:57:49 +0100 Subject: virtio-mem: Destroy mutex before freeing virtio_mem Add a call to mutex_destroy in the error code path as well as in the virtio_mem_remove code path. Signed-off-by: Maurice Hieronymus Acked-by: David Hildenbrand (Red Hat) Signed-off-by: Michael S. Tsirkin Message-ID: <20251123175750.445461-3-mhi@mailbox.org> --- drivers/virtio/virtio_mem.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 48051e9e98ab..11c441501582 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2975,6 +2975,7 @@ static int virtio_mem_probe(struct virtio_device *vdev) out_del_vq: vdev->config->del_vqs(vdev); out_free_vm: + mutex_destroy(&vm->hotplug_mutex); kfree(vm); vdev->priv = NULL; @@ -3067,6 +3068,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) virtio_reset_device(vdev); vdev->config->del_vqs(vdev); + mutex_destroy(&vm->hotplug_mutex); kfree(vm); vdev->priv = NULL; } -- cgit v1.2.3 From 4f3da991b55c940cedb563836fe11ddfb3ff9248 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Apr 2026 12:47:02 +0200 Subject: vdpa_sim_blk: switch to dynamic root device Driver core expects devices to be dynamically allocated and will, for example, complain loudly when no release function has been provided. Use root_device_register() to allocate and register the root device instead of open coding using a static device. Signed-off-by: Johan Hovold Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Message-ID: <20260424104703.2619093-2-johan@kernel.org> --- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index b137f3679343..f70f454dde8e 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -397,14 +397,7 @@ static void vdpasim_blk_free(struct vdpasim *vdpasim) kvfree(blk->buffer); } -static void vdpasim_blk_mgmtdev_release(struct device *dev) -{ -} - -static struct device vdpasim_blk_mgmtdev = { - .init_name = "vdpasim_blk", - .release = vdpasim_blk_mgmtdev_release, -}; +static struct device *vdpasim_blk_mgmtdev; static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, const struct vdpa_dev_set_config *config) @@ -475,7 +468,6 @@ static struct virtio_device_id id_table[] = { }; static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vdpasim_blk_mgmtdev, .id_table = id_table, .ops = &vdpasim_blk_mgmtdev_ops, }; @@ -484,12 +476,11 @@ static int __init vdpasim_blk_init(void) { int ret; - ret = device_register(&vdpasim_blk_mgmtdev); - if (ret) { - put_device(&vdpasim_blk_mgmtdev); - return ret; - } + vdpasim_blk_mgmtdev = root_device_register("vdpasim_blk"); + if (IS_ERR(vdpasim_blk_mgmtdev)) + return PTR_ERR(vdpasim_blk_mgmtdev); + mgmt_dev.device = vdpasim_blk_mgmtdev; ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) goto parent_err; @@ -507,7 +498,8 @@ static int __init vdpasim_blk_init(void) mgmt_dev_err: vdpa_mgmtdev_unregister(&mgmt_dev); parent_err: - device_unregister(&vdpasim_blk_mgmtdev); + root_device_unregister(vdpasim_blk_mgmtdev); + return ret; } @@ -515,7 +507,7 @@ static void __exit vdpasim_blk_exit(void) { kvfree(shared_buffer); vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vdpasim_blk_mgmtdev); + root_device_unregister(vdpasim_blk_mgmtdev); } module_init(vdpasim_blk_init) -- cgit v1.2.3 From e13fc46b4dd1e8f8bb913229dc307bf1ce08ea77 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Apr 2026 12:47:03 +0200 Subject: vdpa_sim_net: switch to dynamic root device Driver core expects devices to be dynamically allocated and will, for example, complain loudly when no release function has been provided. Use root_device_register() to allocate and register the root device instead of open coding using a static device. Signed-off-by: Johan Hovold Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Message-ID: <20260424104703.2619093-3-johan@kernel.org> --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 6caf09a1907b..29fd14ce5860 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -453,14 +453,7 @@ static void vdpasim_net_free(struct vdpasim *vdpasim) kvfree(net->buffer); } -static void vdpasim_net_mgmtdev_release(struct device *dev) -{ -} - -static struct device vdpasim_net_mgmtdev = { - .init_name = "vdpasim_net", - .release = vdpasim_net_mgmtdev_release, -}; +static struct device *vdpasim_net_mgmtdev; static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, const struct vdpa_dev_set_config *config) @@ -538,7 +531,6 @@ static struct virtio_device_id id_table[] = { }; static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vdpasim_net_mgmtdev, .id_table = id_table, .ops = &vdpasim_net_mgmtdev_ops, .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | @@ -552,26 +544,25 @@ static int __init vdpasim_net_init(void) { int ret; - ret = device_register(&vdpasim_net_mgmtdev); - if (ret) { - put_device(&vdpasim_net_mgmtdev); - return ret; - } + vdpasim_net_mgmtdev = root_device_register("vdpasim_net"); + if (IS_ERR(vdpasim_net_mgmtdev)) + return PTR_ERR(vdpasim_net_mgmtdev); + mgmt_dev.device = vdpasim_net_mgmtdev; ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) goto parent_err; return 0; parent_err: - device_unregister(&vdpasim_net_mgmtdev); + root_device_unregister(vdpasim_net_mgmtdev); return ret; } static void __exit vdpasim_net_exit(void) { vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vdpasim_net_mgmtdev); + root_device_unregister(vdpasim_net_mgmtdev); } module_init(vdpasim_net_init); -- cgit v1.2.3 From 4d130b63bd08b789f70a9f4a7af33104cc8cebce Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 7 May 2026 22:18:37 -0700 Subject: vdpa/mlx5: Use kvzalloc_flex() for MTT command memory The create mkey command memory embeds the MTT array as a flexible array member. Use kvzalloc_flex() to allocate it directly instead of open-coding the struct_size() calculation with kvcalloc(). The MTT allocation still needs to be aligned to MLX5_VDPA_MTT_ALIGN bytes. Since each MTT entry is __be64, align the entry count directly and avoid carrying a separate byte length variable. Assisted-by: Codex:GPT-5.5 Signed-off-by: Rosen Penev Reviewed-by: Dragos Tatulea Signed-off-by: Michael S. Tsirkin Message-ID: <20260508051837.1744409-1-rosenp@gmail.com> --- drivers/vdpa/mlx5/core/mr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 42c2705077a6..6d02ccf9eb91 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -221,11 +221,10 @@ static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr * list_for_each_entry(dmr, &mr->head, list) { struct mlx5_create_mkey_mem *cmd_mem; - int mttlen, mttcount; + int mttcount; - mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN); - mttcount = mttlen / sizeof(cmd_mem->mtt[0]); - cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL); + mttcount = ALIGN(dmr->nsg, MLX5_VDPA_MTT_ALIGN / sizeof(cmd_mem->mtt[0])); + cmd_mem = kvzalloc_flex(*cmd_mem, mtt, mttcount); if (!cmd_mem) { err = -ENOMEM; goto done; -- cgit v1.2.3 From b20b0867f2b36f382d6e77b8f2a489cc37a94366 Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Fri, 30 Jan 2026 18:00:09 -0800 Subject: vhost: remove unnecessary module_init/exit functions The vhost driver has unnecessary empty module_init and module_exit functions. Remove them. Note that if a module_init function exists, a module_exit function must also exist; otherwise, the module cannot be unloaded. Signed-off-by: Ethan Nelson-Moore Signed-off-by: Michael S. Tsirkin Message-ID: <20260131020010.45647-1-enelsonmoore@gmail.com> --- drivers/vhost/vhost.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'drivers') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index db329a6f6145..4c525b3e16ea 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -3324,18 +3324,6 @@ void vhost_set_backend_features(struct vhost_dev *dev, u64 features) } EXPORT_SYMBOL_GPL(vhost_set_backend_features); -static int __init vhost_init(void) -{ - return 0; -} - -static void __exit vhost_exit(void) -{ -} - -module_init(vhost_init); -module_exit(vhost_exit); - MODULE_VERSION("0.0.1"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Michael S. Tsirkin"); -- cgit v1.2.3 From 74dc530f4c505d61f0f3620e59fe56c325ae3437 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 24 Feb 2026 15:22:23 +0530 Subject: vdpa/octeon_ep: Fix PF->VF mailbox data address calculation The mailbox address was computed assuming 1 ring per VF. Read the actual rings-per-VF from OCTEP_EPF_RINFO and use it when calculating OCTEP_PF_MBOX_DATA offsets, fixing VF initialization when rings per VF > 1. Fixes: 8b6c724cdab8 ("virtio: vdpa: vDPA driver for Marvell OCTEON DPU devices") Signed-off-by: Srujana Challa Signed-off-by: Michael S. Tsirkin Message-ID: <20260224095226.1001151-2-schalla@marvell.com> --- drivers/vdpa/octeon_ep/octep_vdpa_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index 31a02e7fd7f2..9946480ee704 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2024 Marvell. */ +#include #include #include #include @@ -722,6 +723,8 @@ static int octep_sriov_enable(struct pci_dev *pdev, int num_vfs) bool done = false; int index = 0; int ret, i; + u8 rpvf; + u64 val; ret = pci_enable_sriov(pdev, num_vfs); if (ret) @@ -741,9 +744,11 @@ static int octep_sriov_enable(struct pci_dev *pdev, int num_vfs) } } + val = readq(addr + OCTEP_EPF_RINFO(0)); + rpvf = FIELD_GET(GENMASK_ULL(35, 32), val); if (done) { for (i = 0; i < pf->enabled_vfs; i++) - writeq(OCTEP_DEV_READY_SIGNATURE, addr + OCTEP_PF_MBOX_DATA(i)); + writeq(OCTEP_DEV_READY_SIGNATURE, addr + OCTEP_PF_MBOX_DATA(i * rpvf)); } return num_vfs; -- cgit v1.2.3 From d42eadf7969e2217fceb84317d3fd28a7bdbd96e Mon Sep 17 00:00:00 2001 From: Vamsi Attunuru Date: Tue, 24 Feb 2026 15:22:24 +0530 Subject: vdpa/octeon_ep: Use 4 bytes for mailbox signature The upper 4 bytes are reserved by the firmware for storing meta data. Use only lower 4 bytes to update the signature details. Signed-off-by: Vamsi Attunuru Signed-off-by: Michael S. Tsirkin Message-ID: <20260224095226.1001151-3-schalla@marvell.com> --- drivers/vdpa/octeon_ep/octep_vdpa_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index 9946480ee704..deaa8dc7813e 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -573,10 +573,10 @@ static const struct vdpa_mgmtdev_ops octep_vdpa_mgmt_dev_ops = { static bool get_device_ready_status(u8 __iomem *addr) { - u64 signature = readq(addr + OCTEP_VF_MBOX_DATA(0)); + u32 signature = readl(addr + OCTEP_VF_MBOX_DATA(0)); if (signature == OCTEP_DEV_READY_SIGNATURE) { - writeq(0, addr + OCTEP_VF_MBOX_DATA(0)); + writel(0, addr + OCTEP_VF_MBOX_DATA(0)); return true; } @@ -748,7 +748,7 @@ static int octep_sriov_enable(struct pci_dev *pdev, int num_vfs) rpvf = FIELD_GET(GENMASK_ULL(35, 32), val); if (done) { for (i = 0; i < pf->enabled_vfs; i++) - writeq(OCTEP_DEV_READY_SIGNATURE, addr + OCTEP_PF_MBOX_DATA(i * rpvf)); + writel(OCTEP_DEV_READY_SIGNATURE, addr + OCTEP_PF_MBOX_DATA(i * rpvf)); } return num_vfs; -- cgit v1.2.3 From a5786561649a52402090756308d1c57a914e09b3 Mon Sep 17 00:00:00 2001 From: Vamsi Attunuru Date: Tue, 24 Feb 2026 15:22:25 +0530 Subject: vdpa/octeon_ep: Add vDPA device event handling for firmware notifications Handle vDPA device add and remove events from Octeon firmware. Use irq 0 for event delivery as device interrupts are multiplexed. Signed-off-by: Vamsi Attunuru Signed-off-by: Michael S. Tsirkin Message-ID: <20260224095226.1001151-4-schalla@marvell.com> --- drivers/vdpa/octeon_ep/octep_vdpa.h | 22 ++++++- drivers/vdpa/octeon_ep/octep_vdpa_main.c | 107 +++++++++++++++++++++++++++---- 2 files changed, 115 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/octeon_ep/octep_vdpa.h b/drivers/vdpa/octeon_ep/octep_vdpa.h index 53b020b019f7..a67bf50e4075 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa.h +++ b/drivers/vdpa/octeon_ep/octep_vdpa.h @@ -30,8 +30,10 @@ #define OCTEP_EPF_RINFO(x) (0x000209f0 | ((x) << 25)) #define OCTEP_VF_MBOX_DATA(x) (0x00010210 | ((x) << 17)) #define OCTEP_PF_MBOX_DATA(x) (0x00022000 | ((x) << 4)) +#define OCTEP_VF_EVENT_STATE(x) (0x00010030 | ((x) << 17)) +#define OCTEP_VF_EVENT_REG(x) (0x00010060 | ((x) << 17)) #define OCTEP_VF_IN_CTRL(x) (0x00010000 | ((x) << 17)) -#define OCTEP_VF_IN_CTRL_RPVF(val) (((val) >> 48) & 0xF) +#define OCTEP_VF_IN_CTRL_RPVF(val) (FIELD_GET(GENMASK_ULL(51, 48), val)) #define OCTEP_FW_READY_SIGNATURE0 0xFEEDFEED #define OCTEP_FW_READY_SIGNATURE1 0x3355ffaa @@ -43,9 +45,26 @@ enum octep_vdpa_dev_status { OCTEP_VDPA_DEV_STATUS_WAIT_FOR_BAR_INIT, OCTEP_VDPA_DEV_STATUS_INIT, OCTEP_VDPA_DEV_STATUS_READY, + OCTEP_VDPA_DEV_STATUS_ADDED, + OCTEP_VDPA_DEV_STATUS_REMOVED, OCTEP_VDPA_DEV_STATUS_UNINIT }; +enum octep_vdpa_dev_event_state { + OCTEP_VDPA_DEV_NO_EVENT, + OCTEP_VDPA_DEV_NEW_EVENT, + OCTEP_VDPA_DEV_EVENT_ACTIVE, + OCTEP_VDPA_DEV_EVENT_DONE, +}; + +enum octep_vdpa_dev_event { + OCTEP_VDPA_DEV_EVENT_NONE, + OCTEP_VDPA_DEV_EVENT_ACK, + OCTEP_VDPA_DEV_EVENT_NACK, + OCTEP_VDPA_DEV_ADD_EVENT, + OCTEP_VDPA_DEV_DEL_EVENT, +}; + struct octep_vring_info { struct vdpa_callback cb; void __iomem *notify_addr; @@ -86,6 +105,7 @@ struct octep_hw { u64 features; u16 nr_vring; u32 config_size; + int requested_irqs; int nb_irqs; int *irqs; u8 dev_id; diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index deaa8dc7813e..5b9d76632376 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -9,6 +9,7 @@ #include "octep_vdpa.h" #define OCTEP_VDPA_DRIVER_NAME "octep_vdpa" +#define OCTEP_VDPA_NAME_BUFSIZE 16 struct octep_pf { u8 __iomem *base[PCI_STD_NUM_BARS]; @@ -20,6 +21,11 @@ struct octep_pf { u16 vf_devid; }; +struct octep_vdpa_event_wk { + struct work_struct work; + void *ctxptr; +}; + struct octep_vdpa { struct vdpa_device vdpa; struct octep_hw *oct_hw; @@ -34,6 +40,8 @@ struct octep_vdpa_mgmt_dev { struct work_struct setup_task; /* Device status */ atomic_t status; + struct octep_vdpa *oct_vdpa; + struct octep_vdpa_event_wk event_wk; }; static struct octep_hw *vdpa_to_octep_hw(struct vdpa_device *vdpa_dev) @@ -45,6 +53,27 @@ static struct octep_hw *vdpa_to_octep_hw(struct vdpa_device *vdpa_dev) return oct_vdpa->oct_hw; } +static inline void octep_vdpa_dev_event_schedule(struct octep_hw *oct_hw) +{ + u8 __iomem *addr = oct_hw->base[OCTEP_HW_MBOX_BAR]; + struct octep_vdpa_mgmt_dev *mgmt_dev; + + mgmt_dev = container_of(oct_hw, struct octep_vdpa_mgmt_dev, oct_hw); + writeb(OCTEP_VDPA_DEV_EVENT_ACTIVE, addr + OCTEP_VF_EVENT_STATE(0)); + schedule_work(&mgmt_dev->event_wk.work); +} + +static irqreturn_t octep_vdpa_dev_event_handler(int irq, void *data) +{ + struct octep_hw *oct_hw = data; + + if (readb(oct_hw->base[OCTEP_HW_MBOX_BAR] + OCTEP_VF_EVENT_STATE(0)) == + OCTEP_VDPA_DEV_NEW_EVENT) + octep_vdpa_dev_event_schedule(oct_hw); + + return IRQ_HANDLED; +} + static irqreturn_t octep_vdpa_intr_handler(int irq, void *data) { struct octep_hw *oct_hw = data; @@ -73,11 +102,14 @@ static irqreturn_t octep_vdpa_intr_handler(int irq, void *data) } /* Check for config interrupt. Config uses the first interrupt */ - if (unlikely(irq == oct_hw->irqs[0] && ioread8(oct_hw->isr))) { - iowrite8(0, oct_hw->isr); + if (unlikely(irq == oct_hw->irqs[0])) { + if (ioread8(oct_hw->isr)) { + iowrite8(0, oct_hw->isr); - if (oct_hw->config_cb.callback) - oct_hw->config_cb.callback(oct_hw->config_cb.private); + if (oct_hw->config_cb.callback) + oct_hw->config_cb.callback(oct_hw->config_cb.private); + } + octep_vdpa_dev_event_handler(irq, data); } return IRQ_HANDLED; @@ -101,33 +133,41 @@ static void octep_free_irqs(struct octep_hw *oct_hw) pci_free_irq_vectors(pdev); devm_kfree(&pdev->dev, oct_hw->irqs); oct_hw->irqs = NULL; + oct_hw->requested_irqs = 0; } -static int octep_request_irqs(struct octep_hw *oct_hw) +static int octep_request_irqs(struct octep_hw *oct_hw, irqreturn_t (*irq_handler)(int, void *), + int nb_irqs) { struct pci_dev *pdev = oct_hw->pdev; int ret, irq, idx; - oct_hw->irqs = devm_kcalloc(&pdev->dev, oct_hw->nb_irqs, sizeof(int), GFP_KERNEL); + if ((oct_hw->requested_irqs != nb_irqs) || (nb_irqs == 1)) + octep_free_irqs(oct_hw); + else + return 0; + + oct_hw->irqs = devm_kcalloc(&pdev->dev, nb_irqs, sizeof(int), GFP_KERNEL); if (!oct_hw->irqs) return -ENOMEM; - ret = pci_alloc_irq_vectors(pdev, 1, oct_hw->nb_irqs, PCI_IRQ_MSIX); + ret = pci_alloc_irq_vectors(pdev, 1, nb_irqs, PCI_IRQ_MSIX); if (ret < 0) { dev_err(&pdev->dev, "Failed to alloc msix vector"); return ret; } - for (idx = 0; idx < oct_hw->nb_irqs; idx++) { + for (idx = 0; idx < nb_irqs; idx++) { irq = pci_irq_vector(pdev, idx); - ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0, - dev_name(&pdev->dev), oct_hw); + ret = devm_request_irq(&pdev->dev, irq, irq_handler, 0, dev_name(&pdev->dev), + oct_hw); if (ret) { dev_err(&pdev->dev, "Failed to register interrupt handler\n"); goto free_irqs; } oct_hw->irqs[idx] = irq; } + oct_hw->requested_irqs = nb_irqs; return 0; @@ -189,7 +229,7 @@ static void octep_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) { - if (octep_request_irqs(oct_hw)) + if (octep_request_irqs(oct_hw, octep_vdpa_intr_handler, oct_hw->nb_irqs)) status = status_old | VIRTIO_CONFIG_S_FAILED; } octep_hw_set_status(oct_hw, status); @@ -212,8 +252,10 @@ static int octep_vdpa_reset(struct vdpa_device *vdpa_dev) } octep_hw_reset(oct_hw); - if (status & VIRTIO_CONFIG_S_DRIVER_OK) + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { octep_free_irqs(oct_hw); + octep_request_irqs(oct_hw, octep_vdpa_dev_event_handler, 1); + } return 0; } @@ -478,7 +520,8 @@ static void octep_vdpa_remove_vf(struct pci_dev *pdev) atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_UNINIT); cancel_work_sync(&mgmt_dev->setup_task); - if (status == OCTEP_VDPA_DEV_STATUS_READY) + if ((status == OCTEP_VDPA_DEV_STATUS_READY) || (status == OCTEP_VDPA_DEV_STATUS_ADDED) || + (status == OCTEP_VDPA_DEV_STATUS_REMOVED)) vdpa_mgmtdev_unregister(&mgmt_dev->mdev); if (oct_hw->base[OCTEP_HW_CAPS_BAR]) @@ -488,6 +531,7 @@ static void octep_vdpa_remove_vf(struct pci_dev *pdev) octep_iounmap_region(pdev, oct_hw->base, OCTEP_HW_MBOX_BAR); octep_vdpa_vf_bar_shrink(pdev); + octep_free_irqs(oct_hw); } static void octep_vdpa_remove(struct pci_dev *pdev) @@ -521,6 +565,7 @@ static int octep_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, oct_vdpa->vdpa.mdev = mdev; oct_vdpa->oct_hw = oct_hw; vdpa_dev = &oct_vdpa->vdpa; + mgmt_dev->oct_vdpa = oct_vdpa; device_features = oct_hw->features; if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { @@ -554,6 +599,7 @@ static int octep_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, dev_err(&pdev->dev, "Failed to register to vDPA bus"); goto vdpa_dev_put; } + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_ADDED); return 0; vdpa_dev_put: @@ -563,7 +609,9 @@ vdpa_dev_put: static void octep_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *vdpa_dev) { + struct octep_vdpa_mgmt_dev *mgmt_dev = container_of(mdev, struct octep_vdpa_mgmt_dev, mdev); _vdpa_unregister_device(vdpa_dev); + atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_REMOVED); } static const struct vdpa_mgmtdev_ops octep_vdpa_mgmt_dev_ops = { @@ -588,6 +636,36 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static void octep_event_work(struct work_struct *work) +{ + struct octep_vdpa_event_wk *wk = container_of(work, struct octep_vdpa_event_wk, work); + struct octep_vdpa_mgmt_dev *mgmt_dev = (struct octep_vdpa_mgmt_dev *)wk->ctxptr; + u8 __iomem *addr = mgmt_dev->oct_hw.base[OCTEP_HW_MBOX_BAR]; + u8 event = readb(addr + OCTEP_VF_EVENT_REG(0)); + struct vdpa_dev_set_config config = {0}; + char name[OCTEP_VDPA_NAME_BUFSIZE]; + int ret = 0; + + switch (event) { + case OCTEP_VDPA_DEV_ADD_EVENT: + if (atomic_read(&mgmt_dev->status) != OCTEP_VDPA_DEV_STATUS_ADDED) { + snprintf(name, sizeof(name), "%s-%x", "vdpa", mgmt_dev->pdev->devfn); + ret = octep_vdpa_dev_add(&mgmt_dev->mdev, name, &config); + } + break; + case OCTEP_VDPA_DEV_DEL_EVENT: + if (atomic_read(&mgmt_dev->status) == OCTEP_VDPA_DEV_STATUS_ADDED) + octep_vdpa_dev_del(&mgmt_dev->mdev, &mgmt_dev->oct_vdpa->vdpa); + break; + default: + break; + } + + event = ret ? OCTEP_VDPA_DEV_EVENT_NACK : OCTEP_VDPA_DEV_EVENT_ACK; + writeb(event, addr + OCTEP_VF_EVENT_REG(0)); + writeb(OCTEP_VDPA_DEV_EVENT_DONE, addr + OCTEP_VF_EVENT_STATE(0)); +} + static void octep_vdpa_setup_task(struct work_struct *work) { struct octep_vdpa_mgmt_dev *mgmt_dev = container_of(work, struct octep_vdpa_mgmt_dev, @@ -653,6 +731,9 @@ static void octep_vdpa_setup_task(struct work_struct *work) } atomic_set(&mgmt_dev->status, OCTEP_VDPA_DEV_STATUS_READY); + INIT_WORK(&mgmt_dev->event_wk.work, octep_event_work); + mgmt_dev->event_wk.ctxptr = mgmt_dev; + octep_request_irqs(&mgmt_dev->oct_hw, octep_vdpa_dev_event_handler, 1); return; -- cgit v1.2.3 From 0d21a1d6375a05274291e32c1ab7cd57dbb69513 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 24 Feb 2026 15:22:26 +0530 Subject: vdpa/octeon_ep: fix IRQ-to-ring mapping in interrupt handler Look up the IRQ index in oct_hw->irqs instead of assuming irq - irqs[0]. This supports non-contiguous IRQ numbers and avoids incorrect ring indexing when irqs[0] is not the base. Fixes: 26f8ce06af64 ("vdpa/octeon_ep: enable support for multiple interrupts per device") Signed-off-by: Srujana Challa Signed-off-by: Michael S. Tsirkin Message-ID: <20260224095226.1001151-5-schalla@marvell.com> --- drivers/vdpa/octeon_ep/octep_vdpa_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c index 5b9d76632376..5b35993750f5 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c @@ -77,7 +77,7 @@ static irqreturn_t octep_vdpa_dev_event_handler(int irq, void *data) static irqreturn_t octep_vdpa_intr_handler(int irq, void *data) { struct octep_hw *oct_hw = data; - int i; + int i, start_ring_idx = -1; /* Each device has multiple interrupts (nb_irqs) shared among rings * (nr_vring). Device interrupts are mapped to the rings in a @@ -90,7 +90,16 @@ static irqreturn_t octep_vdpa_intr_handler(int irq, void *data) * 7 -> 7, 15, 23, 31, 39, 47, 55, 63; */ - for (i = irq - oct_hw->irqs[0]; i < oct_hw->nr_vring; i += oct_hw->nb_irqs) { + for (i = 0; i < oct_hw->nb_irqs; i++) { + if (oct_hw->irqs[i] == irq) { + start_ring_idx = i; + break; + } + } + if (start_ring_idx == -1) + return IRQ_NONE; + + for (i = start_ring_idx; i < oct_hw->nr_vring; i += oct_hw->nb_irqs) { if (ioread8(oct_hw->vqs[i].cb_notify_addr)) { /* Acknowledge the per ring notification to the device */ iowrite8(0, oct_hw->vqs[i].cb_notify_addr); -- cgit v1.2.3 From 7222e8c8567e2aa1a2f1b4d3e40158d64ce538b6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 29 May 2026 22:05:59 +0800 Subject: fw_cfg: Add support for LoongArch architecture Qemu fw_cfg support was missing for LoongArch, which made some functions unusable in virtual machines. So add the missing LoongArch defines. Signed-off-by: Huacai Chen Signed-off-by: Michael S. Tsirkin Message-ID: <20260529140559.1775511-1-chenhuacai@loongson.cn> --- drivers/firmware/Kconfig | 2 +- drivers/firmware/qemu_fw_cfg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index bbd2155d8483..9ec747716389 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -121,7 +121,7 @@ config RASPBERRYPI_FIRMWARE config FW_CFG_SYSFS tristate "QEMU fw_cfg device support in sysfs" - depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86) + depends on SYSFS && (ARM || ARM64 || LOONGARCH || PARISC || PPC_PMAC || RISCV || SPARC || X86) depends on HAS_IOPORT_MAP default n help diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 87a5421bc7d5..0c51a9df589f 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -211,7 +211,7 @@ static void fw_cfg_io_cleanup(void) /* arch-specific ctrl & data register offsets are not available in ACPI, DT */ #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF)) -# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV)) +# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) || defined(CONFIG_RISCV)) # define FW_CFG_CTRL_OFF 0x08 # define FW_CFG_DATA_OFF 0x00 # define FW_CFG_DMA_OFF 0x10 -- cgit v1.2.3 From 82da84282c0746ae7c6d87dad7b8daba88f0d091 Mon Sep 17 00:00:00 2001 From: Yui Washizu Date: Tue, 10 Mar 2026 15:14:52 +0900 Subject: virtio: add num_vf callback to virtio_bus Recent QEMU versions added support for virtio SR-IOV emulation, allowing virtio devices to expose SR-IOV VFs to the guest. However, virtio_bus does not implement the num_vf callback of bus_type, causing dev_num_vf() to return 0 for virtio devices even when SR-IOV VFs are active. net/core/rtnetlink.c calls dev_num_vf(dev->dev.parent) to populate IFLA_NUM_VF in RTM_GETLINK responses. For a virtio-net device, dev.parent points to the virtio_device, whose busis virtio_bus. Without num_vf, SR-IOV VF information is silently omitted from tools that rely on rtnetlink, such as 'ip link show'. Add a num_vf callback that delegates to dev_num_vf(dev->parent), which in turn reaches the underlying transport (pci_bus_type for virtio-pci) where the actual VF count is tracked. Non-PCI transports are unaffected as dev_num_vf() returns 0 when no num_vf callback is present. Signed-off-by: Yui Washizu Signed-off-by: Michael S. Tsirkin Message-ID: <20260310061454.683894-1-yui.washidu@gmail.com> --- drivers/virtio/virtio.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 5bdc6b82b30b..299fa83be1d5 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -435,6 +435,14 @@ static void virtio_dev_shutdown(struct device *_d) dev->config->reset(dev); } +static int virtio_dev_num_vf(struct device *dev) +{ + struct virtio_device *vdev = dev_to_virtio(dev); + + return dev_num_vf(vdev->dev.parent); +} + + static const struct bus_type virtio_bus = { .name = "virtio", .match = virtio_dev_match, @@ -444,6 +452,7 @@ static const struct bus_type virtio_bus = { .remove = virtio_dev_remove, .irq_get_affinity = virtio_irq_get_affinity, .shutdown = virtio_dev_shutdown, + .num_vf = virtio_dev_num_vf, }; int __register_virtio_driver(struct virtio_driver *driver, struct module *owner) -- cgit v1.2.3 From 083082a4e6d8311c91ac7e1d59426514c5e1c04b Mon Sep 17 00:00:00 2001 From: Matias Ezequiel Vara Larsen Date: Tue, 26 May 2026 18:42:23 +0200 Subject: can: virtio: Add virtio CAN driver Add virtio CAN driver based on Virtio 1.4 specification (see https://github.com/oasis-tcs/virtio-spec/tree/virtio-1.4). The driver implements a complete CAN bus interface over Virtio transport, supporting both CAN Classic and CAN-FD Ids. In term of frames, it supports classic and CAN FD. RTR frames are only supported with classic CAN. Usage: - "ip link set up can0" - start controller - "ip link set down can0" - stop controller - "candump can0" - receive frames - "cansend can0 123#DEADBEEF" - send frames Signed-off-by: Harald Mommer Co-developed-by: Harald Mommer Signed-off-by: Mikhail Golubev-Ciuchea Co-developed-by: Marc Kleine-Budde Signed-off-by: Marc Kleine-Budde Cc: Damir Shaikhutdinov Reviewed-by: Francesco Valla Tested-by: Francesco Valla Signed-off-by: Matias Ezequiel Vara Larsen Signed-off-by: Michael S. Tsirkin Message-ID: --- drivers/net/can/Kconfig | 12 + drivers/net/can/Makefile | 1 + drivers/net/can/virtio_can.c | 1022 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1035 insertions(+) create mode 100644 drivers/net/can/virtio_can.c (limited to 'drivers') diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index e15e320db476..e4058708ae68 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -226,6 +226,18 @@ config CAN_TI_HECC Driver for TI HECC (High End CAN Controller) module found on many TI devices. The device specifications are available from www.ti.com +config CAN_VIRTIO_CAN + depends on VIRTIO + tristate "Virtio CAN device support" + default n + help + Say Y here if you want to support for Virtio CAN. + + To compile this driver as a module, choose M here: the + module will be called virtio-can. + + If unsure, say N. + config CAN_XILINXCAN tristate "Xilinx CAN" depends on ARCH_ZYNQ || ARM64 || MICROBLAZE || COMPILE_TEST diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index d7bc10a6b8ea..4010d17f8583 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_CAN_PEAK_PCIEFD) += peak_canfd/ obj-$(CONFIG_CAN_SJA1000) += sja1000/ obj-$(CONFIG_CAN_SUN4I) += sun4i_can.o obj-$(CONFIG_CAN_TI_HECC) += ti_hecc.o +obj-$(CONFIG_CAN_VIRTIO_CAN) += virtio_can.o obj-$(CONFIG_CAN_XILINXCAN) += xilinx_can.o subdir-ccflags-$(CONFIG_CAN_DEBUG_DEVICES) += -DDEBUG diff --git a/drivers/net/can/virtio_can.c b/drivers/net/can/virtio_can.c new file mode 100644 index 000000000000..f67d0bf09681 --- /dev/null +++ b/drivers/net/can/virtio_can.c @@ -0,0 +1,1022 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CAN bus driver for the Virtio CAN controller + * + * Copyright (C) 2021-2023 OpenSynergy GmbH + * Copyright Red Hat, Inc. 2025 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* CAN device queues */ +#define VIRTIO_CAN_QUEUE_TX 0 +#define VIRTIO_CAN_QUEUE_RX 1 +#define VIRTIO_CAN_QUEUE_CONTROL 2 +#define VIRTIO_CAN_QUEUE_COUNT 3 + +#define CAN_KNOWN_FLAGS \ + (VIRTIO_CAN_FLAGS_EXTENDED |\ + VIRTIO_CAN_FLAGS_FD |\ + VIRTIO_CAN_FLAGS_RTR) + +/* Max. number of in flight TX messages */ +#define VIRTIO_CAN_ECHO_SKB_MAX 128 + +struct virtio_can_tx { + unsigned int putidx; + struct virtio_can_tx_in tx_in; + /* Keep virtio_can_tx_out at the end of the structure due to flex array */ + struct virtio_can_tx_out tx_out; +}; + +struct virtio_can_control { + struct virtio_can_control_out cpkt_out; + struct virtio_can_control_in cpkt_in; +}; + +/* virtio_can private data structure */ +struct virtio_can_priv { + struct can_priv can; /* must be the first member */ + /* NAPI for RX messages */ + struct napi_struct napi; + /* NAPI for TX messages */ + struct napi_struct napi_tx; + /* The network device we're associated with */ + struct net_device *dev; + /* The virtio device we're associated with */ + struct virtio_device *vdev; + /* The virtqueues */ + struct virtqueue *vqs[VIRTIO_CAN_QUEUE_COUNT]; + /* Lock for TX operations */ + spinlock_t tx_lock; + /* Control queue lock */ + struct mutex ctrl_lock; + /* Wait for control queue processing without polling */ + struct completion ctrl_done; + /* Array of receive queue messages */ + struct virtio_can_rx *rpkt; + struct virtio_can_control can_ctr_msg; + /* Data to get and maintain the putidx for local TX echo */ + struct ida tx_putidx_ida; + /* In flight TX messages */ + atomic_t tx_inflight; + /* Packet length */ + int rpkt_len; + /* BusOff pending. Reset after successful indication to upper layer */ + bool busoff_pending; + /* Tracks whether NAPI instances are currently enabled */ + bool napi_active; +}; + +static void virtqueue_napi_schedule(struct napi_struct *napi, + struct virtqueue *vq) +{ + if (napi_schedule_prep(napi)) { + virtqueue_disable_cb(vq); + __napi_schedule(napi); + } +} + +static void virtqueue_napi_complete(struct napi_struct *napi, + struct virtqueue *vq, int processed) +{ + int opaque; + + opaque = virtqueue_enable_cb_prepare(vq); + if (napi_complete_done(napi, processed)) { + if (unlikely(virtqueue_poll(vq, opaque))) + virtqueue_napi_schedule(napi, vq); + } else { + virtqueue_disable_cb(vq); + } +} + +static void virtio_can_free_candev(struct net_device *ndev) +{ + struct virtio_can_priv *priv = netdev_priv(ndev); + + ida_destroy(&priv->tx_putidx_ida); + free_candev(ndev); +} + +static void virtio_can_napi_enable(struct virtio_can_priv *priv) +{ + if (!priv->napi_active) { + napi_enable(&priv->napi); + napi_enable(&priv->napi_tx); + priv->napi_active = true; + } +} + +static void virtio_can_napi_disable(struct virtio_can_priv *priv) +{ + if (priv->napi_active) { + napi_disable(&priv->napi_tx); + napi_disable(&priv->napi); + priv->napi_active = false; + } +} + +static int virtio_can_alloc_tx_idx(struct virtio_can_priv *priv) +{ + int tx_idx; + + tx_idx = ida_alloc_max(&priv->tx_putidx_ida, + priv->can.echo_skb_max - 1, GFP_ATOMIC); + if (tx_idx >= 0) + atomic_inc(&priv->tx_inflight); + + return tx_idx; +} + +static void virtio_can_free_tx_idx(struct virtio_can_priv *priv, + unsigned int idx) +{ + ida_free(&priv->tx_putidx_ida, idx); + atomic_dec(&priv->tx_inflight); +} + +/* Create a scatter-gather list representing our input buffer and put + * it in the queue. + * + * Callers should take appropriate locks. + */ +static int virtio_can_add_inbuf(struct virtqueue *vq, void *buf, + unsigned int size) +{ + struct scatterlist sg[1]; + int ret; + + sg_init_one(sg, buf, size); + + ret = virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC); + + return ret; +} + +/* Send a control message with message type either + * + * - VIRTIO_CAN_SET_CTRL_MODE_START or + * - VIRTIO_CAN_SET_CTRL_MODE_STOP. + * + */ +static u8 virtio_can_send_ctrl_msg(struct net_device *ndev, u16 msg_type) +{ + struct scatterlist sg_out, sg_in, *sgs[2] = { &sg_out, &sg_in }; + struct virtio_can_priv *priv = netdev_priv(ndev); + struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_CONTROL]; + struct device *dev = &priv->vdev->dev; + unsigned int len; + int err; + + if (!vq) + return VIRTIO_CAN_RESULT_NOT_OK; + + guard(mutex)(&priv->ctrl_lock); + + priv->can_ctr_msg.cpkt_out.msg_type = cpu_to_le16(msg_type); + sg_init_one(&sg_out, &priv->can_ctr_msg.cpkt_out, + sizeof(priv->can_ctr_msg.cpkt_out)); + sg_init_one(&sg_in, &priv->can_ctr_msg.cpkt_in, sizeof(priv->can_ctr_msg.cpkt_in)); + + reinit_completion(&priv->ctrl_done); + + err = virtqueue_add_sgs(vq, sgs, 1u, 1u, priv, GFP_ATOMIC); + if (err != 0) { + dev_err(dev, "%s(): virtqueue_add_sgs() failed\n", __func__); + return VIRTIO_CAN_RESULT_NOT_OK; + } + + if (!virtqueue_kick(vq)) { + dev_err(dev, "%s(): Kick failed\n", __func__); + return VIRTIO_CAN_RESULT_NOT_OK; + } + + while (!virtqueue_get_buf(vq, &len) && !virtqueue_is_broken(vq)) + wait_for_completion(&priv->ctrl_done); + + return priv->can_ctr_msg.cpkt_in.result; +} + +static int virtio_can_start(struct net_device *ndev) +{ + struct virtio_can_priv *priv = netdev_priv(ndev); + u8 result; + + result = virtio_can_send_ctrl_msg(ndev, VIRTIO_CAN_SET_CTRL_MODE_START); + if (result != VIRTIO_CAN_RESULT_OK) { + netdev_err(ndev, "CAN controller start failed\n"); + return -EIO; + } + + priv->busoff_pending = false; + priv->can.state = CAN_STATE_ERROR_ACTIVE; + + return 0; +} + +static int virtio_can_set_mode(struct net_device *dev, enum can_mode mode) +{ + int err; + + switch (mode) { + case CAN_MODE_START: + err = virtio_can_start(dev); + if (err) + return err; + netif_wake_queue(dev); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int virtio_can_open(struct net_device *ndev) +{ + struct virtio_can_priv *priv = netdev_priv(ndev); + int err; + + err = open_candev(ndev); + if (err) + return err; + + err = virtio_can_start(ndev); + if (err) { + close_candev(ndev); + return err; + } + + virtio_can_napi_enable(priv); + netif_start_queue(ndev); + + return 0; +} + +static int virtio_can_stop(struct net_device *ndev) +{ + struct virtio_can_priv *priv = netdev_priv(ndev); + struct device *dev = &priv->vdev->dev; + u8 result; + + result = virtio_can_send_ctrl_msg(ndev, VIRTIO_CAN_SET_CTRL_MODE_STOP); + if (result != VIRTIO_CAN_RESULT_OK) { + dev_err(dev, "CAN controller stop failed\n"); + return -EIO; + } + + priv->busoff_pending = false; + priv->can.state = CAN_STATE_STOPPED; + + /* Switch carrier off if device was connected to the bus */ + if (netif_carrier_ok(ndev)) + netif_carrier_off(ndev); + + return 0; +} + +static int virtio_can_close(struct net_device *dev) +{ + struct virtio_can_priv *priv = netdev_priv(dev); + + netif_stop_queue(dev); + /* Ignore stop error: ndo_stop must always complete cleanup regardless. + * virtio_can_stop() already logs the error if it fails. + */ + virtio_can_stop(dev); + virtio_can_napi_disable(priv); + close_candev(dev); + + return 0; +} + +static netdev_tx_t virtio_can_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct scatterlist sg_out, sg_in, *sgs[2] = { &sg_out, &sg_in }; + const unsigned int hdr_size = sizeof(struct virtio_can_tx_out); + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + struct virtio_can_priv *priv = netdev_priv(dev); + struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_TX]; + netdev_tx_t xmit_ret = NETDEV_TX_OK; + struct virtio_can_tx *can_tx_msg; + u32 can_flags; + int putidx; + int err; + + if (can_dev_dropped_skb(dev, skb)) + goto kick; /* No way to return NET_XMIT_DROP here */ + + /* No local check for CAN_RTR_FLAG or FD frame against negotiated + * features. The device will reject those anyway if not supported. + */ + + can_tx_msg = kzalloc(sizeof(*can_tx_msg) + cf->len, GFP_ATOMIC); + if (!can_tx_msg) { + kfree_skb(skb); + dev->stats.tx_dropped++; + goto kick; /* No way to return NET_XMIT_DROP here */ + } + + can_tx_msg->tx_out.msg_type = cpu_to_le16(VIRTIO_CAN_TX); + can_tx_msg->tx_out.length = cpu_to_le16(cf->len); + can_flags = 0; + + if (cf->can_id & CAN_EFF_FLAG) { + can_flags |= VIRTIO_CAN_FLAGS_EXTENDED; + can_tx_msg->tx_out.can_id = cpu_to_le32(cf->can_id & CAN_EFF_MASK); + } else { + can_tx_msg->tx_out.can_id = cpu_to_le32(cf->can_id & CAN_SFF_MASK); + } + if (cf->can_id & CAN_RTR_FLAG) + can_flags |= VIRTIO_CAN_FLAGS_RTR; + else + memcpy(can_tx_msg->tx_out.sdu, cf->data, cf->len); + if (can_is_canfd_skb(skb)) + can_flags |= VIRTIO_CAN_FLAGS_FD; + + can_tx_msg->tx_out.flags = cpu_to_le32(can_flags); + + sg_init_one(&sg_out, &can_tx_msg->tx_out, hdr_size + cf->len); + sg_init_one(&sg_in, &can_tx_msg->tx_in, sizeof(can_tx_msg->tx_in)); + + putidx = virtio_can_alloc_tx_idx(priv); + + if (unlikely(putidx < 0)) { + /* -ENOMEM or -ENOSPC here. -ENOSPC should not be possible as + * tx_inflight >= can.echo_skb_max is checked in flow control + */ + WARN_ON_ONCE(putidx == -ENOSPC); + kfree(can_tx_msg); + kfree_skb(skb); + dev->stats.tx_dropped++; + goto kick; /* No way to return NET_XMIT_DROP here */ + } + + can_tx_msg->putidx = (unsigned int)putidx; + + /* Push loopback echo. Will be looped back on TX interrupt/TX NAPI */ + err = can_put_echo_skb(skb, dev, can_tx_msg->putidx, 0); + if (unlikely(err)) { + /* skb was already freed by can_put_echo_skb() on error */ + virtio_can_free_tx_idx(priv, can_tx_msg->putidx); + kfree(can_tx_msg); + dev->stats.tx_dropped++; + goto kick; + } + + /* Protect queue and list operations */ + scoped_guard(spinlock_irqsave, &priv->tx_lock) + err = virtqueue_add_sgs(vq, sgs, 1u, 1u, can_tx_msg, GFP_ATOMIC); + + if (unlikely(err)) { + /* + * can_put_echo_skb() already consumed skb via consume_skb(), + * so returning NETDEV_TX_BUSY would cause the stack to requeue + * a freed pointer. Drop the frame and return OK instead. + */ + can_free_echo_skb(dev, can_tx_msg->putidx, NULL); + virtio_can_free_tx_idx(priv, can_tx_msg->putidx); + netif_stop_queue(dev); + kfree(can_tx_msg); + dev->stats.tx_dropped++; + /* Expected never to be seen */ + netdev_warn(dev, "TX: Stop queue, err = %d\n", err); + goto kick; + } + + /* Normal flow control: stop queue when no transmission slots left */ + if (atomic_read(&priv->tx_inflight) >= priv->can.echo_skb_max || + vq->num_free == 0 || (vq->num_free < ARRAY_SIZE(sgs) && + !virtio_has_feature(vq->vdev, VIRTIO_RING_F_INDIRECT_DESC))) { + netif_stop_queue(dev); + netdev_dbg(dev, "TX: Normal stop queue\n"); + } + +kick: + if (netif_queue_stopped(dev) || !netdev_xmit_more()) { + scoped_guard(spinlock_irqsave, &priv->tx_lock) { + if (!virtqueue_kick(vq)) + netdev_err(dev, "%s(): Kick failed\n", __func__); + } + } + + return xmit_ret; +} + +static const struct net_device_ops virtio_can_netdev_ops = { + .ndo_open = virtio_can_open, + .ndo_stop = virtio_can_close, + .ndo_start_xmit = virtio_can_start_xmit, +}; + +static int register_virtio_can_dev(struct net_device *dev) +{ + dev->flags |= IFF_ECHO; /* we support local echo */ + dev->netdev_ops = &virtio_can_netdev_ops; + + return register_candev(dev); +} + +static int virtio_can_read_tx_queue(struct virtqueue *vq) +{ + struct virtio_can_priv *can_priv = vq->vdev->priv; + struct net_device *dev = can_priv->dev; + struct virtio_can_tx *can_tx_msg; + struct net_device_stats *stats; + unsigned int len; + u8 result; + + stats = &dev->stats; + + scoped_guard(spinlock_irqsave, &can_priv->tx_lock) + can_tx_msg = virtqueue_get_buf(vq, &len); + + if (!can_tx_msg) + return 0; + + if (unlikely(len < sizeof(struct virtio_can_tx_in))) { + netdev_err(dev, "TX ACK: Device sent no result code\n"); + result = VIRTIO_CAN_RESULT_NOT_OK; /* Keep things going */ + } else { + result = can_tx_msg->tx_in.result; + } + + if (can_priv->can.state < CAN_STATE_BUS_OFF) { + if (result != VIRTIO_CAN_RESULT_OK) { + struct can_frame *skb_cf; + struct sk_buff *skb = alloc_can_err_skb(dev, &skb_cf); + + if (skb) { + skb_cf->can_id |= CAN_ERR_CRTL; + skb_cf->data[1] |= CAN_ERR_CRTL_UNSPEC; + netif_rx(skb); + } + netdev_warn(dev, "TX ACK: Result = %u\n", result); + can_free_echo_skb(dev, can_tx_msg->putidx, NULL); + stats->tx_dropped++; + } else { + stats->tx_bytes += can_get_echo_skb(dev, can_tx_msg->putidx, + NULL); + stats->tx_packets++; + } + } else { + netdev_dbg(dev, "TX ACK: Controller inactive, drop echo\n"); + can_free_echo_skb(dev, can_tx_msg->putidx, NULL); + stats->tx_dropped++; + } + + virtio_can_free_tx_idx(can_priv, can_tx_msg->putidx); + + /* Flow control */ + if (netif_queue_stopped(dev)) { + netdev_dbg(dev, "TX ACK: Wake up stopped queue\n"); + netif_wake_queue(dev); + } + + kfree(can_tx_msg); + + return 1; /* Queue was not empty so there may be more data */ +} + +static int virtio_can_tx_poll(struct napi_struct *napi, int quota) +{ + struct net_device *dev = napi->dev; + struct virtio_can_priv *priv = netdev_priv(dev); + struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_TX]; + int work_done = 0; + + while (work_done < quota && virtio_can_read_tx_queue(vq) != 0) + work_done++; + + if (work_done < quota) + virtqueue_napi_complete(napi, vq, work_done); + + return work_done; +} + +static void virtio_can_tx_intr(struct virtqueue *vq) +{ + struct virtio_can_priv *can_priv = vq->vdev->priv; + + virtqueue_disable_cb(vq); + napi_schedule(&can_priv->napi_tx); +} + +/* This function is the NAPI RX poll function and NAPI guarantees that this + * function is not invoked simultaneously on multiple processors. + * Read a RX message from the used queue and sends it to the upper layer. + */ +static int virtio_can_read_rx_queue(struct virtqueue *vq) +{ + const unsigned int header_size = sizeof(struct virtio_can_rx); + struct virtio_can_priv *priv = vq->vdev->priv; + struct net_device *dev = priv->dev; + struct net_device_stats *stats; + struct virtio_can_rx *can_rx; + unsigned int transport_len; + struct canfd_frame *cf; + struct sk_buff *skb; + unsigned int len; + u32 can_flags; + u16 msg_type; + u32 can_id; + int ret; + + stats = &dev->stats; + + can_rx = virtqueue_get_buf(vq, &transport_len); + if (!can_rx) + return 0; /* No more data */ + + if (transport_len < header_size) { + netdev_warn(dev, "RX: Message too small\n"); + goto putback; + } + + if (priv->can.state >= CAN_STATE_ERROR_PASSIVE) { + netdev_dbg(dev, "%s(): Controller not active\n", __func__); + goto putback; + } + + msg_type = le16_to_cpu(can_rx->msg_type); + if (msg_type != VIRTIO_CAN_RX) { + netdev_warn(dev, "RX: Got unknown msg_type %04x\n", msg_type); + goto putback; + } + + len = le16_to_cpu(can_rx->length); + can_flags = le32_to_cpu(can_rx->flags); + can_id = le32_to_cpu(can_rx->can_id); + + if (can_flags & ~CAN_KNOWN_FLAGS) { + stats->rx_dropped++; + netdev_warn(dev, "RX: CAN Id 0x%08x: Invalid flags 0x%x\n", + can_id, can_flags); + goto putback; + } + + if (can_flags & VIRTIO_CAN_FLAGS_EXTENDED) { + can_id &= CAN_EFF_MASK; + can_id |= CAN_EFF_FLAG; + } else { + can_id &= CAN_SFF_MASK; + } + + if (can_flags & VIRTIO_CAN_FLAGS_RTR) { + if (!virtio_has_feature(vq->vdev, VIRTIO_CAN_F_RTR_FRAMES)) { + stats->rx_dropped++; + netdev_warn(dev, "RX: CAN Id 0x%08x: RTR not negotiated\n", + can_id); + goto putback; + } + if (can_flags & VIRTIO_CAN_FLAGS_FD) { + stats->rx_dropped++; + netdev_warn(dev, "RX: CAN Id 0x%08x: RTR with FD not possible\n", + can_id); + goto putback; + } + + if (len > 0xF) { + stats->rx_dropped++; + netdev_warn(dev, "RX: CAN Id 0x%08x: RTR with DLC > 0xF\n", + can_id); + goto putback; + } + + if (len > 0x8) + len = 0x8; + + can_id |= CAN_RTR_FLAG; + } + + if (transport_len < header_size + len) { + netdev_warn(dev, "RX: Message too small for payload\n"); + goto putback; + } + + if (can_flags & VIRTIO_CAN_FLAGS_FD) { + if (!virtio_has_feature(vq->vdev, VIRTIO_CAN_F_CAN_FD)) { + stats->rx_dropped++; + netdev_warn(dev, "RX: CAN Id 0x%08x: FD not negotiated\n", + can_id); + goto putback; + } + + if (len > CANFD_MAX_DLEN) + len = CANFD_MAX_DLEN; + + skb = alloc_canfd_skb(priv->dev, &cf); + } else { + if (!virtio_has_feature(vq->vdev, VIRTIO_CAN_F_CAN_CLASSIC)) { + stats->rx_dropped++; + netdev_warn(dev, "RX: CAN Id 0x%08x: classic not negotiated\n", + can_id); + goto putback; + } + + if (len > CAN_MAX_DLEN) + len = CAN_MAX_DLEN; + + skb = alloc_can_skb(priv->dev, (struct can_frame **)&cf); + } + if (!skb) { + stats->rx_dropped++; + netdev_warn(dev, "RX: No skb available\n"); + goto putback; + } + + cf->can_id = can_id; + cf->len = len; + if (!(can_flags & VIRTIO_CAN_FLAGS_RTR)) { + /* RTR frames have a DLC but no payload */ + memcpy(cf->data, can_rx->sdu, len); + } + + if (netif_receive_skb(skb) == NET_RX_SUCCESS) { + stats->rx_packets++; + if (!(can_flags & VIRTIO_CAN_FLAGS_RTR)) + stats->rx_bytes += len; + } + +putback: + /* Put processed RX buffer back into avail queue */ + ret = virtio_can_add_inbuf(vq, can_rx, + priv->rpkt_len); + if (!ret) + virtqueue_kick(vq); + return 1; /* Queue was not empty so there may be more data */ +} + +static int virtio_can_handle_busoff(struct net_device *dev) +{ + struct virtio_can_priv *priv = netdev_priv(dev); + struct can_frame *cf; + struct sk_buff *skb; + + if (!priv->busoff_pending) + return 0; + + if (priv->can.state < CAN_STATE_BUS_OFF) { + netdev_dbg(dev, "entered error bus off state\n"); + + /* bus-off state */ + priv->can.state = CAN_STATE_BUS_OFF; + priv->can.can_stats.bus_off++; + can_bus_off(dev); + } + + /* propagate the error condition to the CAN stack */ + skb = alloc_can_err_skb(dev, &cf); + if (unlikely(!skb)) + return 0; + + /* bus-off state */ + cf->can_id |= CAN_ERR_BUSOFF; + + /* Ensure that the BusOff indication does not get lost */ + if (netif_receive_skb(skb) == NET_RX_SUCCESS) + priv->busoff_pending = false; + + return 1; +} + +static int virtio_can_rx_poll(struct napi_struct *napi, int quota) +{ + struct net_device *dev = napi->dev; + struct virtio_can_priv *priv = netdev_priv(dev); + struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_RX]; + int work_done = 0; + + work_done += virtio_can_handle_busoff(dev); + + while (work_done < quota && virtio_can_read_rx_queue(vq) != 0) + work_done++; + + if (work_done < quota) + virtqueue_napi_complete(napi, vq, work_done); + + return work_done; +} + +static void virtio_can_rx_intr(struct virtqueue *vq) +{ + struct virtio_can_priv *can_priv = vq->vdev->priv; + + virtqueue_disable_cb(vq); + napi_schedule(&can_priv->napi); +} + +static void virtio_can_control_intr(struct virtqueue *vq) +{ + struct virtio_can_priv *can_priv = vq->vdev->priv; + + complete(&can_priv->ctrl_done); +} + +static void virtio_can_config_changed(struct virtio_device *vdev) +{ + struct virtio_can_priv *can_priv = vdev->priv; + u16 status; + + status = virtio_cread16(vdev, offsetof(struct virtio_can_config, + status)); + + if (!(status & VIRTIO_CAN_S_CTRL_BUSOFF)) + return; + + if (!can_priv->busoff_pending && + can_priv->can.state < CAN_STATE_BUS_OFF) { + can_priv->busoff_pending = true; + napi_schedule(&can_priv->napi); + } +} + +static void virtio_can_populate_rx_vq(struct virtio_device *vdev) +{ + struct virtio_can_priv *priv = vdev->priv; + struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_RX]; + unsigned int buf_size = priv->rpkt_len; + int num_elements = vq->num_free; + u8 *buf = (u8 *)priv->rpkt; + unsigned int idx; + int ret = 0; + + for (idx = 0; idx < num_elements; idx++) { + ret = virtio_can_add_inbuf(vq, buf, buf_size); + if (ret < 0) { + dev_dbg(&vdev->dev, "rpkt fill: ret=%d, idx=%u, size=%u\n", + ret, idx, buf_size); + break; + } + buf += buf_size; + } + + if (idx > 0) + virtqueue_kick(vq); + + dev_dbg(&vdev->dev, "%u rpkt added\n", idx); +} + +static int virtio_can_find_vqs(struct virtio_can_priv *priv) +{ + struct virtqueue_info vqs_info[] = { + { "can-tx", virtio_can_tx_intr }, + { "can-rx", virtio_can_rx_intr }, + { "can-state-ctrl", virtio_can_control_intr }, + }; + + /* Find the queues. */ + return virtio_find_vqs(priv->vdev, VIRTIO_CAN_QUEUE_COUNT, priv->vqs, + vqs_info, NULL); +} + +/* Function must not be called before virtio_can_find_vqs() has been run */ +static void virtio_can_del_vq(struct virtio_device *vdev) +{ + struct virtio_can_priv *priv = vdev->priv; + struct virtqueue *vq = priv->vqs[VIRTIO_CAN_QUEUE_TX]; + struct virtio_can_tx *can_tx_msg; + int q; + + if (!vq) + return; + + /* Reset the device */ + virtio_reset_device(vdev); + + /* From here we have dead silence from the device side so no locks + * are needed to protect against device side events. + */ + + /* Free pending TX buffers which were allocated in virtio_can_start_xmit() */ + while ((can_tx_msg = virtqueue_detach_unused_buf(vq))) { + can_free_echo_skb(priv->dev, can_tx_msg->putidx, NULL); + virtio_can_free_tx_idx(priv, can_tx_msg->putidx); + kfree(can_tx_msg); + } + + /* RX and control queue buffers are managed elsewhere, just detach */ + for (q = VIRTIO_CAN_QUEUE_RX; q < VIRTIO_CAN_QUEUE_COUNT; q++) + while (virtqueue_detach_unused_buf(priv->vqs[q])) + ; + + if (vdev->config->del_vqs) + vdev->config->del_vqs(vdev); + + memset(priv->vqs, 0, sizeof(priv->vqs)); +} + +static void virtio_can_remove(struct virtio_device *vdev) +{ + struct virtio_can_priv *priv = vdev->priv; + struct net_device *dev = priv->dev; + + unregister_candev(dev); + + virtio_can_del_vq(vdev); + + virtio_can_free_candev(dev); +} + +static int virtio_can_validate(struct virtio_device *vdev) +{ + /* CAN needs always access to the config space. + * Check that the driver can access the config space + */ + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { + dev_err(&vdev->dev, + "device does not comply with spec version 1.x\n"); + return -EINVAL; + } + + return 0; +} + +static int virtio_can_probe(struct virtio_device *vdev) +{ + struct virtio_can_priv *priv; + struct net_device *dev; + size_t size; + int err; + + dev = alloc_candev(sizeof(struct virtio_can_priv), + VIRTIO_CAN_ECHO_SKB_MAX); + if (!dev) + return -ENOMEM; + + priv = netdev_priv(dev); + + ida_init(&priv->tx_putidx_ida); + + netif_napi_add(dev, &priv->napi, virtio_can_rx_poll); + netif_napi_add(dev, &priv->napi_tx, virtio_can_tx_poll); + + SET_NETDEV_DEV(dev, &vdev->dev); + + priv->dev = dev; + priv->vdev = vdev; + vdev->priv = priv; + + priv->can.do_set_mode = virtio_can_set_mode; + priv->can.bittiming.bitrate = CAN_BITRATE_UNKNOWN; + /* Set Virtio CAN supported operations */ + priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING; + if (virtio_has_feature(vdev, VIRTIO_CAN_F_CAN_FD)) { + priv->can.fd.data_bittiming.bitrate = CAN_BITRATE_UNKNOWN; + err = can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD); + if (err != 0) + goto on_failure; + } + + /* Initialize virtqueues */ + err = virtio_can_find_vqs(priv); + if (err != 0) + goto on_failure; + + spin_lock_init(&priv->tx_lock); + mutex_init(&priv->ctrl_lock); + + init_completion(&priv->ctrl_done); + + priv->rpkt_len = sizeof(struct virtio_can_rx); + + if (virtio_has_feature(vdev, VIRTIO_CAN_F_CAN_FD)) + priv->rpkt_len += CANFD_MAX_DLEN; + else + priv->rpkt_len += CAN_MAX_DLEN; + + size = priv->rpkt_len * priv->vqs[VIRTIO_CAN_QUEUE_RX]->num_free; + priv->rpkt = devm_kzalloc(&vdev->dev, size, GFP_KERNEL); + if (!priv->rpkt) { + virtio_can_del_vq(vdev); + err = -ENOMEM; + goto on_failure; + } + virtio_can_populate_rx_vq(vdev); + + err = register_virtio_can_dev(dev); + if (err) { + virtio_can_del_vq(vdev); + goto on_failure; + } + + return 0; + +on_failure: + virtio_can_free_candev(dev); + return err; +} + +static int __maybe_unused virtio_can_freeze(struct virtio_device *vdev) +{ + struct virtio_can_priv *priv = vdev->priv; + struct net_device *ndev = priv->dev; + + if (netif_running(ndev)) { + /* virtio_can_close() calls netif_stop_queue(), virtio_can_stop(), + * napi_disable() and close_candev(). Call it directly (not via + * dev_close()) to preserve IFF_UP so that netif_running() returns + * true in virtio_can_restore() and the device is brought back up. + */ + virtio_can_close(ndev); + netif_device_detach(ndev); + } + + priv->can.state = CAN_STATE_SLEEPING; + + virtio_can_del_vq(vdev); + + return 0; +} + +static int __maybe_unused virtio_can_restore(struct virtio_device *vdev) +{ + struct virtio_can_priv *priv = vdev->priv; + struct net_device *ndev = priv->dev; + size_t size; + int err; + + err = virtio_can_find_vqs(priv); + if (err != 0) + return err; + + size = priv->rpkt_len * priv->vqs[VIRTIO_CAN_QUEUE_RX]->num_free; + priv->rpkt = devm_krealloc(&vdev->dev, priv->rpkt, size, GFP_KERNEL | __GFP_ZERO); + if (!priv->rpkt) { + virtio_can_del_vq(vdev); + return -ENOMEM; + } + virtio_can_populate_rx_vq(vdev); + + if (netif_running(ndev)) { + /* virtio_can_open() calls open_candev(), virtio_can_start(), + * napi_enable() and netif_start_queue(). Call it directly (not + * via dev_open()) since IFF_UP is still set from before freeze. + */ + err = virtio_can_open(ndev); + if (err) { + virtio_can_del_vq(vdev); + return err; + } + netif_device_attach(ndev); + } else { + priv->can.state = CAN_STATE_STOPPED; + } + + return 0; +} + +static struct virtio_device_id virtio_can_id_table[] = { + { VIRTIO_ID_CAN, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static unsigned int features[] = { + VIRTIO_CAN_F_CAN_CLASSIC, + VIRTIO_CAN_F_CAN_FD, + VIRTIO_CAN_F_LATE_TX_ACK, + VIRTIO_CAN_F_RTR_FRAMES, +}; + +static struct virtio_driver virtio_can_driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = virtio_can_id_table, + .validate = virtio_can_validate, + .probe = virtio_can_probe, + .remove = virtio_can_remove, + .config_changed = virtio_can_config_changed, +#ifdef CONFIG_PM_SLEEP + .freeze = virtio_can_freeze, + .restore = virtio_can_restore, +#endif +}; + +module_virtio_driver(virtio_can_driver); +MODULE_DEVICE_TABLE(virtio, virtio_can_id_table); + +MODULE_AUTHOR("OpenSynergy GmbH"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CAN bus driver for Virtio CAN controller"); -- cgit v1.2.3