From 48fcc895403cc97aa6c776cb65e6aa11290c0b44 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 23 Apr 2026 17:26:26 +0000 Subject: mshv: add a missing padding field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was missed when importing the header. Reported-by: Doru Blânzeanu Reported-by: Magnus Kulke Fixes: e68bda71a2384 ("hyperv: Add new Hyper-V headers in include/hyperv") Cc: stable@kernel.org Reviewed-by: Easwar Hariharan Signed-off-by: Wei Liu --- include/hyperv/hvhdk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 5e83d3714966..0c89c62c9706 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -79,6 +79,7 @@ struct hv_vp_register_page { u64 registers[18]; }; + u8 reserved[8]; /* Volatile XMM registers (HV_X64_REGISTER_CLASS_XMM) */ union { struct { -- cgit v1.2.3 From 3fcf923302a8f5c0dc3af3d2ca2657cb5fae4297 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 14 Apr 2026 13:10:08 +0200 Subject: hv: utils: handle and propagate errors in kvp_register Make kvp_register() return an error code instead of silently ignoring failures, and propagate the error from kvp_handle_handshake() instead of returning success. This propagates both kzalloc_obj() and hvutil_transport_send() failures to kvp_handle_handshake() and thus to kvp_on_msg(). Fixes: 245ba56a52a3 ("Staging: hv: Implement key/value pair (KVP)") Cc: stable@vger.kernel.org Signed-off-by: Thorsten Blum Reviewed-by: Long Li Signed-off-by: Wei Liu --- drivers/hv/hv_kvp.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 0d73daf745a7..6180ebe040ff 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -93,7 +93,7 @@ static void kvp_send_key(struct work_struct *dummy); static void kvp_respond_to_host(struct hv_kvp_msg *msg, int error); static void kvp_timeout_func(struct work_struct *dummy); static void kvp_host_handshake_func(struct work_struct *dummy); -static void kvp_register(int); +static int kvp_register(int); static DECLARE_DELAYED_WORK(kvp_timeout_work, kvp_timeout_func); static DECLARE_DELAYED_WORK(kvp_host_handshake_work, kvp_host_handshake_func); @@ -127,24 +127,26 @@ static void kvp_register_done(void) hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); } -static void +static int kvp_register(int reg_value) { struct hv_kvp_msg *kvp_msg; char *version; + int ret; kvp_msg = kzalloc_obj(*kvp_msg); + if (!kvp_msg) + return -ENOMEM; - if (kvp_msg) { - version = kvp_msg->body.kvp_register.version; - kvp_msg->kvp_hdr.operation = reg_value; - strcpy(version, HV_DRV_VERSION); + version = kvp_msg->body.kvp_register.version; + kvp_msg->kvp_hdr.operation = reg_value; + strcpy(version, HV_DRV_VERSION); - hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg), - kvp_register_done); - kfree(kvp_msg); - } + ret = hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg), + kvp_register_done); + kfree(kvp_msg); + return ret; } static void kvp_timeout_func(struct work_struct *dummy) @@ -186,9 +188,8 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg) */ pr_debug("KVP: userspace daemon ver. %d connected\n", msg->kvp_hdr.operation); - kvp_register(dm_reg_value); - return 0; + return kvp_register(dm_reg_value); } -- cgit v1.2.3 From 3f8c8497b4fc249e27cb335c627114d8412e584d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 28 Apr 2026 19:11:05 +0200 Subject: hv: utils: replace deprecated strcpy with strscpy in kvp_register strcpy() has been deprecated [1] because it performs no bounds checking on the destination buffer, which can lead to buffer overflows. While the current code works correctly, replace strcpy() with the safer strscpy() to follow secure coding best practices. Use ->body.kvp_register.version directly as the destination buffer and remove the local variable. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy Signed-off-by: Thorsten Blum Signed-off-by: Wei Liu --- drivers/hv/hv_kvp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 6180ebe040ff..336b278b2182 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" @@ -130,18 +131,15 @@ static void kvp_register_done(void) static int kvp_register(int reg_value) { - struct hv_kvp_msg *kvp_msg; - char *version; int ret; kvp_msg = kzalloc_obj(*kvp_msg); if (!kvp_msg) return -ENOMEM; - version = kvp_msg->body.kvp_register.version; kvp_msg->kvp_hdr.operation = reg_value; - strcpy(version, HV_DRV_VERSION); + strscpy(kvp_msg->body.kvp_register.version, HV_DRV_VERSION); ret = hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg), kvp_register_done); -- cgit v1.2.3 From f1a9e67c11388965802a61922c313bfc43272afe Mon Sep 17 00:00:00 2001 From: Jork Loeser Date: Mon, 27 Apr 2026 14:38:52 -0700 Subject: mshv: limit SynIC management to MSHV-owned resources The SynIC is shared between VMBus and MSHV. VMBus owns the message page (SIMP), event flags page (SIEFP), global enable (SCONTROL), and SINT2. MSHV adds SINT0, SINT5, and the event ring page (SIRBP). Currently mshv_synic_cpu_init() redundantly enables SIMP, SIEFP, and SCONTROL that VMBus already configured, and mshv_synic_cpu_exit() disables all of them. This is wrong because MSHV can be torn down while VMBus is still active. In particular, a kexec reboot notifier tears down MSHV first. Disabling SCONTROL, SIMP, and SIEFP out from under VMBus causes its later cleanup to write SynIC MSRs while SynIC is disabled, which the hypervisor does not tolerate. Restrict MSHV to managing only the resources it owns: - SINT0, SINT5: mask on cleanup, unmask on init - SIRBP: enable/disable as before - SIMP, SIEFP, SCONTROL: leave to VMBus when it is active (L1VH and nested root partition); on a non-nested root partition VMBus does not run, so MSHV must enable/disable them While here, fix the SIEFP and SIRBP memremap() and virt_to_phys() calls to use HV_HYP_PAGE_SHIFT/HV_HYP_PAGE_SIZE instead of PAGE_SHIFT/PAGE_SIZE. The hypervisor always uses 4K pages for SynIC register GPAs regardless of the kernel page size, so using PAGE_SHIFT produces wrong addresses on ARM64 with 64K pages. Note that initialization order matters - VMBUS first, MSHV second, and the reverse on de-init. Ideally, we would want a dedicated SYNIC driver that replaces the cross-dependencies with a clear API and dynamic tracking. Such refactor should go into its own dedicated series, outside of this kexec fix series. Signed-off-by: Jork Loeser Reviewed-by: Anirudh Rayabharam (Microsoft) Reviewed-by: Stanislav Kinsburskii Signed-off-by: Wei Liu --- drivers/hv/hv.c | 3 + drivers/hv/mshv_synic.c | 150 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 103 insertions(+), 50 deletions(-) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index ae60fd542292..ef4b1b03395d 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -272,6 +272,9 @@ void hv_synic_free(void) /* * hv_hyp_synic_enable_regs - Initialize the Synthetic Interrupt Controller * with the hypervisor. + * + * Note: When MSHV is present, mshv_synic_cpu_init() intializes further + * registers later. */ void hv_hyp_synic_enable_regs(unsigned int cpu) { diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c index e2288a726fec..2db3b0192eac 100644 --- a/drivers/hv/mshv_synic.c +++ b/drivers/hv/mshv_synic.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -456,46 +457,75 @@ static int mshv_synic_cpu_init(unsigned int cpu) union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; union hv_synic_sint sint; - union hv_synic_scontrol sctrl; struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; struct hv_synic_event_ring_page **event_ring_page = &spages->synic_event_ring_page; + /* + * VMBus owns SIMP/SIEFP/SCONTROL when it is active. + * See hv_hyp_synic_enable_regs() for that initialization. + */ + bool vmbus_active = hv_vmbus_exists(); - /* Setup the Synic's message page */ + /* + * Map the SYNIC message page. When VMBus is not active the + * hypervisor pre-provisions the SIMP GPA but may not set + * simp_enabled — enable it here. + */ simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); - simp.simp_enabled = true; + if (!vmbus_active) { + simp.simp_enabled = true; + hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); + } *msg_page = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, MEMREMAP_WB); if (!(*msg_page)) - return -EFAULT; - - hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); + goto cleanup_simp; - /* Setup the Synic's event flags page */ + /* + * Map the event flags page. Same as SIMP: enable when + * VMBus is not active, already enabled by VMBus otherwise. + */ siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); - siefp.siefp_enabled = true; - *event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT, - PAGE_SIZE, MEMREMAP_WB); + if (!vmbus_active) { + siefp.siefp_enabled = true; + hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); + } + *event_flags_page = memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, + HV_HYP_PAGE_SIZE, MEMREMAP_WB); if (!(*event_flags_page)) - goto cleanup; - - hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); + goto cleanup_siefp; /* Setup the Synic's event ring page */ sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); - sirbp.sirbp_enabled = true; - *event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT, - PAGE_SIZE, MEMREMAP_WB); - if (!(*event_ring_page)) - goto cleanup; + if (hv_root_partition()) { + *event_ring_page = memremap(sirbp.base_sirbp_gpa << HV_HYP_PAGE_SHIFT, + HV_HYP_PAGE_SIZE, MEMREMAP_WB); + if (!(*event_ring_page)) + goto cleanup_siefp; + } else { + /* + * On L1VH the hypervisor does not provide a SIRBP page. + * Allocate one and program its GPA into the MSR. + */ + *event_ring_page = (struct hv_synic_event_ring_page *) + get_zeroed_page(GFP_KERNEL); + + if (!(*event_ring_page)) + goto cleanup_siefp; + + sirbp.base_sirbp_gpa = virt_to_phys(*event_ring_page) + >> HV_HYP_PAGE_SHIFT; + } + + sirbp.sirbp_enabled = true; hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); if (mshv_sint_irq != -1) @@ -518,28 +548,30 @@ static int mshv_synic_cpu_init(unsigned int cpu) hv_set_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_DOORBELL_SINT_INDEX, sint.as_uint64); - /* Enable global synic bit */ - sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); - sctrl.enable = 1; - hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); + /* When VMBus is active it already enabled SCONTROL. */ + if (!vmbus_active) { + union hv_synic_scontrol sctrl; + + sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); + sctrl.enable = 1; + hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); + } return 0; -cleanup: - if (*event_ring_page) { - sirbp.sirbp_enabled = false; - hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); - memunmap(*event_ring_page); - } - if (*event_flags_page) { +cleanup_siefp: + if (*event_flags_page) + memunmap(*event_flags_page); + if (!vmbus_active) { siefp.siefp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); - memunmap(*event_flags_page); } - if (*msg_page) { +cleanup_simp: + if (*msg_page) + memunmap(*msg_page); + if (!vmbus_active) { simp.simp_enabled = false; hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); - memunmap(*msg_page); } return -EFAULT; @@ -548,16 +580,15 @@ cleanup: static int mshv_synic_cpu_exit(unsigned int cpu) { union hv_synic_sint sint; - union hv_synic_simp simp; - union hv_synic_siefp siefp; union hv_synic_sirbp sirbp; - union hv_synic_scontrol sctrl; struct hv_synic_pages *spages = this_cpu_ptr(synic_pages); struct hv_message_page **msg_page = &spages->hyp_synic_message_page; struct hv_synic_event_flags_page **event_flags_page = &spages->synic_event_flags_page; struct hv_synic_event_ring_page **event_ring_page = &spages->synic_event_ring_page; + /* VMBus owns SIMP/SIEFP/SCONTROL when it is active */ + bool vmbus_active = hv_vmbus_exists(); /* Disable the interrupt */ sint.as_uint64 = hv_get_non_nested_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX); @@ -574,28 +605,47 @@ static int mshv_synic_cpu_exit(unsigned int cpu) if (mshv_sint_irq != -1) disable_percpu_irq(mshv_sint_irq); - /* Disable Synic's event ring page */ + /* Disable SYNIC event ring page owned by MSHV */ sirbp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIRBP); sirbp.sirbp_enabled = false; - hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); - memunmap(*event_ring_page); - /* Disable Synic's event flags page */ - siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); - siefp.siefp_enabled = false; - hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); + if (hv_root_partition()) { + hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); + memunmap(*event_ring_page); + } else { + sirbp.base_sirbp_gpa = 0; + hv_set_non_nested_msr(HV_MSR_SIRBP, sirbp.as_uint64); + free_page((unsigned long)*event_ring_page); + } + + /* + * Release our mappings of the message and event flags pages. + * When VMBus is not active, we enabled SIMP/SIEFP — disable + * them. Otherwise VMBus owns the MSRs — leave them. + */ memunmap(*event_flags_page); + if (!vmbus_active) { + union hv_synic_simp simp; + union hv_synic_siefp siefp; - /* Disable Synic's message page */ - simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); - simp.simp_enabled = false; - hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); + siefp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIEFP); + siefp.siefp_enabled = false; + hv_set_non_nested_msr(HV_MSR_SIEFP, siefp.as_uint64); + + simp.as_uint64 = hv_get_non_nested_msr(HV_MSR_SIMP); + simp.simp_enabled = false; + hv_set_non_nested_msr(HV_MSR_SIMP, simp.as_uint64); + } memunmap(*msg_page); - /* Disable global synic bit */ - sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); - sctrl.enable = 0; - hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); + /* When VMBus is active it owns SCONTROL — leave it. */ + if (!vmbus_active) { + union hv_synic_scontrol sctrl; + + sctrl.as_uint64 = hv_get_non_nested_msr(HV_MSR_SCONTROL); + sctrl.enable = 0; + hv_set_non_nested_msr(HV_MSR_SCONTROL, sctrl.as_uint64); + } return 0; } -- cgit v1.2.3 From efe0fb8c3fe2b996522f7418fd311eeff43c1148 Mon Sep 17 00:00:00 2001 From: Jork Loeser Date: Mon, 27 Apr 2026 14:38:53 -0700 Subject: mshv: clean up SynIC state on kexec for L1VH The reboot notifier that tears down the SynIC cpuhp state guards the cleanup with hv_root_partition(), so on L1VH (where hv_root_partition() is false) SINT0, SINT5, and SIRBP are never cleaned up before kexec. The kexec'd kernel then inherits stale unmasked SINTs and an enabled SIRBP pointing to freed memory. Remove the hv_root_partition() guard so the cleanup runs for all parent partitions. Signed-off-by: Jork Loeser Reviewed-by: Stanislav Kinsburskii Reviewed-by: Anirudh Rayabharam (Microsoft) Signed-off-by: Wei Liu --- drivers/hv/mshv_synic.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c index 2db3b0192eac..978a1cace341 100644 --- a/drivers/hv/mshv_synic.c +++ b/drivers/hv/mshv_synic.c @@ -723,9 +723,6 @@ mshv_unregister_doorbell(u64 partition_id, int doorbell_portid) static int mshv_synic_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) { - if (!hv_root_partition()) - return 0; - cpuhp_remove_state(synic_cpuhp_online); return 0; } -- cgit v1.2.3 From 6e55a8d9f21bcb238596bdd8092c92e74c698a3d Mon Sep 17 00:00:00 2001 From: Jork Loeser Date: Mon, 27 Apr 2026 14:38:54 -0700 Subject: mshv: unmap debugfs stats pages on kexec On L1VH, debugfs stats pages are overlay pages: the kernel allocates them and registers the GPAs with the hypervisor via HVCALL_MAP_STATS_PAGE2. These overlay mappings persist in the hypervisor across kexec. If the kexec'd kernel reuses those physical pages, the hypervisor's overlay semantics cause a machine check exception. Fix this by calling mshv_debugfs_exit() from the reboot notifier, which issues HVCALL_UNMAP_STATS_PAGE for each mapped stats page before kexec. This releases the overlay bindings so the physical pages can be safely reused. Guard mshv_debugfs_exit() against being called when init failed. Signed-off-by: Jork Loeser Reviewed-by: Anirudh Rayabharam (Microsoft) Reviewed-by: Stanislav Kinsburskii Signed-off-by: Wei Liu --- drivers/hv/mshv_debugfs.c | 7 ++++++- drivers/hv/mshv_synic.c | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hv/mshv_debugfs.c b/drivers/hv/mshv_debugfs.c index 418b6dc8f3c2..3c3e02237ae9 100644 --- a/drivers/hv/mshv_debugfs.c +++ b/drivers/hv/mshv_debugfs.c @@ -674,8 +674,10 @@ int __init mshv_debugfs_init(void) mshv_debugfs = debugfs_create_dir("mshv", NULL); if (IS_ERR(mshv_debugfs)) { + err = PTR_ERR(mshv_debugfs); + mshv_debugfs = NULL; pr_err("%s: failed to create debugfs directory\n", __func__); - return PTR_ERR(mshv_debugfs); + return err; } if (hv_root_partition()) { @@ -710,6 +712,9 @@ remove_mshv_dir: void mshv_debugfs_exit(void) { + if (!mshv_debugfs) + return; + mshv_debugfs_parent_partition_remove(); if (hv_root_partition()) { diff --git a/drivers/hv/mshv_synic.c b/drivers/hv/mshv_synic.c index 978a1cace341..88170ce6b83f 100644 --- a/drivers/hv/mshv_synic.c +++ b/drivers/hv/mshv_synic.c @@ -723,6 +723,7 @@ mshv_unregister_doorbell(u64 partition_id, int doorbell_portid) static int mshv_synic_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) { + mshv_debugfs_exit(); cpuhp_remove_state(synic_cpuhp_online); return 0; } -- cgit v1.2.3 From c5c3ef8d49e15d2fc1cec4ad7c91d81b99977440 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 17 Feb 2026 10:23:34 -0800 Subject: Drivers: hv: vmbus: Provide option to skip VMBus unload on panic Currently, VMBus code initiates a VMBus unload in the panic path so that if a kdump kernel is loaded, it can start fresh in setting up its own VMBus connection. However, a driver for the VMBus virtual frame buffer may need to flush dirty portions of the frame buffer back to the Hyper-V host so that panic information is visible in the graphics console. To support such flushing, provide exported functions for the frame buffer driver to specify that the VMBus unload should not be done by the VMBus driver, and to initiate the VMBus unload itself. Together these allow a frame buffer driver to delay the VMBus unload until after it has completed the flush. Ideally, the VMBus driver could use its own panic-path callback to do the unload after all frame buffer drivers have finished. But DRM frame buffer drivers use the kmsg dump callback, and there are no callbacks after that in the panic path. Hence this somewhat messy approach to properly sequencing the frame buffer flush and the VMBus unload. Fixes: 3671f3777758 ("drm/hyperv: Add support for drm_panic") Signed-off-by: Michael Kelley Reviewed-by: Long Li Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 1 + drivers/hv/hyperv_vmbus.h | 1 - drivers/hv/vmbus_drv.c | 25 ++++++++++++++++++------- include/linux/hyperv.h | 3 +++ 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 84eb0a6a0b54..89d214dda360 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -952,6 +952,7 @@ void vmbus_initiate_unload(bool crash) else vmbus_wait_for_unload(); } +EXPORT_SYMBOL_GPL(vmbus_initiate_unload); static void vmbus_setup_channel_state(struct vmbus_channel *channel, struct vmbus_channel_offer_channel *offer) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 05a36854389a..eb8bdd8bb1f5 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -441,7 +441,6 @@ void hv_vss_deinit(void); int hv_vss_pre_suspend(void); int hv_vss_pre_resume(void); void hv_vss_onchannelcallback(void *context); -void vmbus_initiate_unload(bool crash); static inline void hv_poll_channel(struct vmbus_channel *channel, void (*cb)(void *)) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d28ff45d4cfd..c9eeb2ec365d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -69,19 +69,29 @@ bool vmbus_is_confidential(void) } EXPORT_SYMBOL_GPL(vmbus_is_confidential); +static bool skip_vmbus_unload; + +/* + * Allow a VMBus framebuffer driver to specify that in the case of a panic, + * it will do the VMbus unload operation once it has flushed any dirty + * portions of the framebuffer to the Hyper-V host. + */ +void vmbus_set_skip_unload(bool skip) +{ + skip_vmbus_unload = skip; +} +EXPORT_SYMBOL_GPL(vmbus_set_skip_unload); + /* * The panic notifier below is responsible solely for unloading the * vmbus connection, which is necessary in a panic event. - * - * Notice an intrincate relation of this notifier with Hyper-V - * framebuffer panic notifier exists - we need vmbus connection alive - * there in order to succeed, so we need to order both with each other - * [see hvfb_on_panic()] - this is done using notifiers' priorities. */ static int hv_panic_vmbus_unload(struct notifier_block *nb, unsigned long val, void *args) { - vmbus_initiate_unload(true); + if (!skip_vmbus_unload) + vmbus_initiate_unload(true); + return NOTIFY_DONE; } static struct notifier_block hyperv_panic_vmbus_unload_block = { @@ -2897,7 +2907,8 @@ static void hv_crash_handler(struct pt_regs *regs) { int cpu; - vmbus_initiate_unload(true); + if (!skip_vmbus_unload) + vmbus_initiate_unload(true); /* * In crash handler we can't schedule synic cleanup for all CPUs, * doing the cleanup for current CPU only. This should be sufficient diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 964f1be8150c..41a3d82f0722 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1336,6 +1336,9 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, bool fb_overlap_ok); void vmbus_free_mmio(resource_size_t start, resource_size_t size); +void vmbus_initiate_unload(bool crash); +void vmbus_set_skip_unload(bool skip); + /* * GUID definitions of various offer types - services offered to the guest. */ -- cgit v1.2.3 From 8b35874f56ded0cc1a90a25b87411249a86246cd Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 17 Feb 2026 10:23:35 -0800 Subject: drm/hyperv: During panic do VMBus unload after frame buffer is flushed In a VM, Linux panic information (reason for the panic, stack trace, etc.) may be written to a serial console and/or a virtual frame buffer for a graphics console. The latter may need to be flushed back to the host hypervisor for display. The current Hyper-V DRM driver for the frame buffer does the flushing *after* the VMBus connection has been unloaded, such that panic messages are not displayed on the graphics console. A user with a Hyper-V graphics console is left with just a hung empty screen after a panic. The enhanced control that DRM provides over the panic display in the graphics console is similarly non-functional. Commit 3671f3777758 ("drm/hyperv: Add support for drm_panic") added the Hyper-V DRM driver support to flush the virtual frame buffer. It provided necessary functionality but did not handle the sequencing problem with VMBus unload. Fix the full problem by using VMBus functions to suppress the VMBus unload that is normally done by the VMBus driver in the panic path. Then after the frame buffer has been flushed, do the VMBus unload so that a kdump kernel can start cleanly. As expected, CONFIG_DRM_PANIC must be selected for these changes to have effect. As a side benefit, the enhanced features of the DRM panic path are also functional. Fixes: 3671f3777758 ("drm/hyperv: Add support for drm_panic") Signed-off-by: Michael Kelley Reviewed-by: Jocelyn Falempe Signed-off-by: Wei Liu --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 5 +++++ drivers/gpu/drm/hyperv/hyperv_drm_modeset.c | 15 ++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index 06b5d96e6eaf..b6bf6412ae34 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -150,6 +150,10 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, goto err_free_mmio; } + /* If DRM panic path is stubbed out VMBus code must do the unload */ + if (IS_ENABLED(CONFIG_DRM_PANIC)) + vmbus_set_skip_unload(true); + drm_client_setup(dev, NULL); return 0; @@ -169,6 +173,7 @@ static void hyperv_vmbus_remove(struct hv_device *hdev) struct drm_device *dev = hv_get_drvdata(hdev); struct hyperv_drm_device *hv = to_hv(dev); + vmbus_set_skip_unload(false); drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); vmbus_close(hdev->channel); diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c index 7978f8c8108c..d48ca6c23b7c 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c @@ -212,15 +212,16 @@ static void hyperv_plane_panic_flush(struct drm_plane *plane) struct hyperv_drm_device *hv = to_hv(plane->dev); struct drm_rect rect; - if (!plane->state || !plane->state->fb) - return; + if (plane->state && plane->state->fb) { + rect.x1 = 0; + rect.y1 = 0; + rect.x2 = plane->state->fb->width; + rect.y2 = plane->state->fb->height; - rect.x1 = 0; - rect.y1 = 0; - rect.x2 = plane->state->fb->width; - rect.y2 = plane->state->fb->height; + hyperv_update_dirt(hv->hdev, &rect); + } - hyperv_update_dirt(hv->hdev, &rect); + vmbus_initiate_unload(true); } static const struct drm_plane_helper_funcs hyperv_plane_helper_funcs = { -- cgit v1.2.3 From 83eb00f31eb1b10735d48e469df72cc2b0e06f6d Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 May 2026 12:21:01 -0700 Subject: hyperv: Clean up and fix the guest ID comment in hvgdk.h Change the "64 bit" to "64-bit", and the "Os" to "OS". Remove the obsolete paragraph since the guideline has been published in the Hypervisor Top Level Functional Specification for many years. The "OS Type" is 0x1 for Linux, not 0x100. No functional change. Fixes: 83ba0c4f3f31 ("Drivers: hv: Cleanup the guest ID computation") Signed-off-by: Dexuan Cui Reviewed-by: Hamza Mahfooz Signed-off-by: Wei Liu --- include/hyperv/hvgdk.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/include/hyperv/hvgdk.h b/include/hyperv/hvgdk.h index 384c3f3ff4a5..f538144280ca 100644 --- a/include/hyperv/hvgdk.h +++ b/include/hyperv/hvgdk.h @@ -10,18 +10,12 @@ /* * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is + * The guest ID is a 64-bit entity and the structure of this ID is * specified in the Hyper-V TLFS specification. * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * * Bit(s) * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 + * 62:56 - OS Type; Linux is 0x1 * 55:48 - Distro specific identification * 47:16 - Linux kernel version number * 15:0 - Distro specific identification -- cgit v1.2.3 From 840b740a35bf969734e0f2e44c21289fdd03079e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 26 May 2026 07:13:04 -0700 Subject: mshv: Add conditional VMBus dependency When the VMBus driver is not part of the kernel (CONFIG_HYPERV_VMBUS=n), the MSHV root driver fails to link: ERROR: modpost: "hv_vmbus_exists" [drivers/hv/mshv_root.ko] undefined! Fix this while meeting these requirements: * It must be possible to include the MSHV root driver without the VMBus driver. In such case, the MSHV root driver can be built-in to the kernel image, or it can be built as a separate module. * If both the MSHV root driver and the VMBus driver are present, the MSHV root driver and VMBus driver can both be built-in, or they can both be separate modules. Or the MSHV root driver can be a module while the VMBus driver can be built-in, but the reverse is disallowed. Regardless of the build choices, the VMBus driver must be loaded before the MSHV driver in order for the SynIC to be managed properly (see comments in the MSHV SynIC code). The fix has two parts: * Add a Kconfig entry for MSHV_ROOT to depend on HYPERV_VMBUS if HYPERV_VMBUS is present. The entry disallows MSHV_ROOT being built-in when HYPERV_VMBUS is a module, but without requiring that HYPERV_VMBUS be built. * Add a stub implementation of hv_vmbus_exists() for when the VMBus driver is not present so that the MSHV root driver has no module dependency on VMBus. When the VMBus driver *is* present, the module dependency ensures that the VMBus driver loads first when both are built as modules. Existing code ensures that the VMBus driver loads first if it is built-in. The VMBus driver uses subsys_initcall(), which is initcall level 4. The MSHV root driver uses module_init(), which becomes device_init() when built-in, and device_init() is initcall level 6. Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/all/20260520074044.923728-1-arnd@kernel.org/ Signed-off-by: Michael Kelley Acked-by: Arnd Bergmann Reviewed-by: Jork Loeser Reviewed-by: Hardik Garg Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 1 + include/linux/hyperv.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 2d0b3fcb0ff8..aa11bcefddf2 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -74,6 +74,7 @@ config MSHV_ROOT # e.g. When withdrawing memory, the hypervisor gives back 4k pages in # no particular order, making it impossible to reassemble larger pages depends on PAGE_SIZE_4KB + depends on HYPERV_VMBUS if HYPERV_VMBUS select EVENTFD select VIRT_XFER_TO_GUEST_WORK select HMM_MIRROR diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 41a3d82f0722..734b7ef98f4d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1304,7 +1304,11 @@ static inline void *hv_get_drvdata(struct hv_device *dev) struct device *hv_get_vmbus_root_device(void); +#if IS_ENABLED(CONFIG_HYPERV_VMBUS) bool hv_vmbus_exists(void); +#else +static inline bool hv_vmbus_exists(void) { return false; } +#endif struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; -- cgit v1.2.3 From c53763aa2b9c96fbabee68ebe1e13074cb09bfb2 Mon Sep 17 00:00:00 2001 From: Can Peng Date: Wed, 20 May 2026 15:16:32 +0800 Subject: mshv: use kmalloc_array in mshv_root_scheduler_init Replace kmalloc() with kmalloc_array() to prevent potential overflow, as recommended in Documentation/process/deprecated.rst. No functional change. Signed-off-by: Can Peng Signed-off-by: Wei Liu --- drivers/hv/mshv_root_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c index bd1359eb58dd..146726cc4e9b 100644 --- a/drivers/hv/mshv_root_main.c +++ b/drivers/hv/mshv_root_main.c @@ -2241,7 +2241,7 @@ static int mshv_root_scheduler_init(unsigned int cpu) outputarg = (void **)this_cpu_ptr(root_scheduler_output); /* Allocate two consecutive pages. One for input, one for output. */ - p = kmalloc(2 * HV_HYP_PAGE_SIZE, GFP_KERNEL); + p = kmalloc_array(2, HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!p) return -ENOMEM; -- cgit v1.2.3 From 016a25e4b0df4d77e7c258edee4aaf982e4ee809 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 7 May 2026 14:28:38 -0700 Subject: Drivers: hv: vmbus: Improve the logic of reserving fb_mmio on Gen2 VMs If vmbus_reserve_fb() in the kdump/kexec kernel fails to properly reserve the framebuffer MMIO range (which is below 4GB) due to a Gen2 VM's screen.lfb_base being zero [1], there is an MMIO conflict between the drivers hyperv-drm and pci-hyperv: when the driver pci-hyperv's hv_allocate_config_window() calls vmbus_allocate_mmio() to get an MMIO range, typically it gets a 32-bit MMIO range that overlaps with the framebuffer MMIO range, and later hv_pci_enter_d0() fails with an error message "PCI Pass-through VSP failed D0 Entry with status" since the host thinks that PCI devices must not use MMIO space that the host has assigned to the framebuffer. This is especially an issue if pci-hyperv is built-in and hyperv-drm is built as a module. Consequently, the kdump/kexec kernel fails to detect PCI devices via pci-hyperv, and may fail to mount the root file system, which may reside in a NVMe disk. The issue described here has existed for SR-IOV VF NICs since day one of the pci-hyperv driver, and has been worked around on x64 when possible. With the recent introduction of ARM64 VMs that boot from NVMe, there is no workaround, so we need a formal fix. On Gen2 VMs, if the screen.lfb_base is 0 in the kdump/kexec kernel [1], fall back to the low MMIO base, which should be equal to the framebuffer MMIO base [2] (the statement is true according to my testing on x64 Windows Server 2016, and on x64 and ARM64 Windows Server 2025 and on Azure. I checked with the Hyper-V team and they said the statement should continue to be true for Gen2 VMs). In the first kernel, screen.lfb_base is not 0; if the user specifies a very high resolution, it's not enough to only reserve 8MB: let's always reserve half of the space below 4GB, but cap the reservation to 128MB, which is the required framebuffer size of the highest resolution 7680*4320 supported by Hyper-V. While at it, fix the comparison "end > VTPM_BASE_ADDRESS" by changing the > to >=. Here the 'end' is an inclusive end (typically, it's 0xFFFF_FFFF for the low MMIO range). Note: vmbus_reserve_fb() now also reserves an MMIO range at the beginning of the low MMIO range on CVMs, which have no framebuffers (the 'screen.lfb_base' in vmbus_reserve_fb() is 0 for CVMs), just in case the host might treat the beginning of the low MMIO range specially [3]. BTW, the OpenHCL kernel is not affected by the change, because that kernel boots with DeviceTree rather than ACPI (so vmbus_reserve_fb() won't run there), and there is no framebuffer device for that kernel. Note: normally Gen1 VMs don't have the MMIO conflict issue because the framebuffer MMIO range (which is hardcoded to base=4GB-128MB and size=64MB for Gen1 VMs by the host) is always reported via the legacy PCI graphics device's BAR, so the kdump/kexec kernel can reserve the 64MB MMIO range; however, if the VM is configured to use a very high resolution and the required framebuffer size exceeds 64MB (AFAIK, in practice, this isn't a typical configuration by users), the hyperv-drm driver may need to allocate an MMIO range above 4GB and change the framebuffer MMIO location to the allocated MMIO range -- in this case, there can still be issues [4] which can't be easily fixed: any possible affected Gen1 users would have to use a resolution whose framebuffer size is <= 64MB, or switch to Gen2 VMs. [1] https://lore.kernel.org/all/SA1PR21MB692176C1BC53BFC9EAE5CF8EBF51A@SA1PR21MB6921.namprd21.prod.outlook.com/ [2] https://lore.kernel.org/all/SA1PR21MB69218F955B62DFF62E3E88D2BF222@SA1PR21MB6921.namprd21.prod.outlook.com/ [3] https://lore.kernel.org/all/SN6PR02MB415726B17D5A6027CD1717E8D4342@SN6PR02MB4157.namprd02.prod.outlook.com/ [4] https://lore.kernel.org/all/SA1PR21MB69213486F821CA5A2C793C81BF342@SA1PR21MB6921.namprd21.prod.outlook.com/ Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") CC: stable@vger.kernel.org Reviewed-by: Michael Kelley Tested-by: Krister Johansen Tested-by: Matthew Ruffell Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index c9eeb2ec365d..b80a35c778ab 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2327,8 +2327,8 @@ static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) return AE_NO_MEMORY; /* If this range overlaps the virtual TPM, truncate it. */ - if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) - end = VTPM_BASE_ADDRESS; + if (end >= VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) + end = VTPM_BASE_ADDRESS - 1; new_res->name = "hyperv mmio"; new_res->flags = IORESOURCE_MEM; @@ -2395,6 +2395,7 @@ static void vmbus_mmio_remove(void) static void __maybe_unused vmbus_reserve_fb(void) { resource_size_t start = 0, size; + resource_size_t low_mmio_base; struct pci_dev *pdev; if (efi_enabled(EFI_BOOT)) { @@ -2402,6 +2403,24 @@ static void __maybe_unused vmbus_reserve_fb(void) if (IS_ENABLED(CONFIG_SYSFB)) { start = sysfb_primary_display.screen.lfb_base; size = max_t(__u32, sysfb_primary_display.screen.lfb_size, 0x800000); + + low_mmio_base = hyperv_mmio->start; + if (!low_mmio_base || upper_32_bits(low_mmio_base) || + (start && start < low_mmio_base)) { + pr_warn("Unexpected low mmio base %pa\n", &low_mmio_base); + } else { + /* + * If the kdump/kexec or CVM kernel's lfb_base + * is 0, fall back to the low mmio base. + */ + if (!start) + start = low_mmio_base; + /* + * Reserve half of the space below 4GB for high + * resolutions, but cap the reservation to 128MB. + */ + size = min((SZ_4G - start) / 2, SZ_128M); + } } } else { /* Gen1 VM: get FB base from PCI */ @@ -2422,8 +2441,10 @@ static void __maybe_unused vmbus_reserve_fb(void) pci_dev_put(pdev); } - if (!start) + if (!start) { + pr_warn("Unexpected framebuffer mmio base of zero\n"); return; + } /* * Make a claim for the frame buffer in the resource tree under the @@ -2433,6 +2454,8 @@ static void __maybe_unused vmbus_reserve_fb(void) */ for (; !fb_mmio && (size >= 0x100000); size >>= 1) fb_mmio = __request_region(hyperv_mmio, start, size, fb_mmio_name, 0); + + pr_info("hv_mmio=%pR,%pR fb=%pR\n", hyperv_mmio, hyperv_mmio->sibling, fb_mmio); } /** -- cgit v1.2.3 From 98e0fc32e53dd62cd38a0d67eaf5846ae20078cc Mon Sep 17 00:00:00 2001 From: "Anirudh Rayabharam (Microsoft)" Date: Wed, 13 May 2026 13:25:56 +0000 Subject: mshv: support 1G hugepages by passing them as 2M-aligned chunks The hypervisor's map GPA hypercall coalesces contiguous 2M-aligned chunks into 1G mappings when alignment permits, so the driver can support 1G hugepages by feeding them in as 2M chunks. Note that this is the only way to make 1G mappings; there is no way to directly map a 1G hugepage using the hypercall. Always emit a 2M (PMD_ORDER) stride for the huge-page case. The hypercall has no 1G stride, so 1G folios are processed as a sequence of 2M chunks. Folios whose order is less than PMD_ORDER (e.g. mTHP) fall back to single-page stride; mapping them as 2M would fail in the hypervisor anyway. Assisted-by: Copilot-CLI:claude-opus-4.7 Signed-off-by: Anirudh Rayabharam (Microsoft) Acked-by: Stanislav Kinsburskii Reviewed-by: Michael Kelley Signed-off-by: Wei Liu --- drivers/hv/mshv_regions.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/hv/mshv_regions.c b/drivers/hv/mshv_regions.c index fdffd4f002f6..6d65e5b42152 100644 --- a/drivers/hv/mshv_regions.c +++ b/drivers/hv/mshv_regions.c @@ -29,29 +29,27 @@ * Uses huge page stride if the backing page is huge and the guest mapping * is properly aligned; otherwise falls back to single page stride. * - * Return: Stride in pages, or -EINVAL if page order is unsupported. + * Return: Stride in pages. */ -static int mshv_chunk_stride(struct page *page, - u64 gfn, u64 page_count) +static unsigned int mshv_chunk_stride(struct page *page, u64 gfn, + u64 page_count) { - unsigned int page_order; + unsigned int page_order = folio_order(page_folio(page)); /* * Use single page stride by default. For huge page stride, the - * page must be compound and point to the head of the compound - * page, and both gfn and page_count must be huge-page aligned. + * folio order must be at least PMD_ORDER, the page's PFN must be + * 2M-aligned (so that a 2M-aligned tail page of a larger folio is + * acceptable), and both gfn and page_count must be 2M-aligned. */ - if (!PageCompound(page) || !PageHead(page) || + if (page_order < PMD_ORDER || + !IS_ALIGNED(page_to_pfn(page), PTRS_PER_PMD) || !IS_ALIGNED(gfn, PTRS_PER_PMD) || !IS_ALIGNED(page_count, PTRS_PER_PMD)) return 1; - page_order = folio_order(page_folio(page)); - /* The hypervisor only supports 2M huge page */ - if (page_order != PMD_ORDER) - return -EINVAL; - - return 1 << page_order; + /* Use 2M stride always i.e. process 1G folios as 2M chunks */ + return 1 << PMD_ORDER; } /** @@ -86,15 +84,14 @@ static long mshv_region_process_chunk(struct mshv_mem_region *region, u64 gfn = region->start_gfn + page_offset; u64 count; struct page *page; - int stride, ret; + unsigned int stride; + int ret; page = region->mreg_pages[page_offset]; if (!page) return -EINVAL; stride = mshv_chunk_stride(page, gfn, page_count); - if (stride < 0) - return stride; /* Start at stride since the first stride is validated */ for (count = stride; count < page_count; count += stride) { -- cgit v1.2.3