summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-19 08:56:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-19 08:56:49 -0700
commitc98d767b34574be82b74d77d02264a830ae1cadd (patch)
tree3dc16f4ebd9d7bdeb7dd4a9c84dae88c692e9ca4 /include/linux
parent08c7183f5b9ffe4408e74fff848a4cc2105361d4 (diff)
parentef057cbf825e03b63f6edf5980f96abf3c53089d (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "arm64: This is a bit of an odd merge window on the KVM/arm64 front. There is absolutely no new feature in the pull request. It is purely fixes, because it is simply becoming too hard to review new stuff when so many AI-fuelled fixes hit the list. - Significant cleanup of the vgic-v5 PPI support which was merged in 7.1. This makes the code more maintainable, and squashes a couple of bugs in the meantime - Set of fixes for the handling of the MMU in an NV context, particularly VNCR-triggered faults. S1POE support is fixed as well - Large set of pKVM fixes, mostly addressing recurring issues around hypervisor tracking of donated pages in obscure cases where the donation could fail and leave things in a bizarre state - Fixes for the so-called "lazy vgic init", which resulted in sleeping operations in non-preemptible sections. This turned out to be far more invasive than initially expected.. - Reduce the overhead of L1/L2 context switch by not touching the FP registers - Fix the way non-implemented page sizes are dealt with when a guest insist on using them for S2 translation - The usual set of low-impact fixes and cleanups all over the map Loongarch: - On a request for lazy FPU load, load all FPU state that the VM supports instead of enabling only the part (FPU, LSX or LASX) that caused the FPU load request - Some enhancements about interrupt injection - Some bug fixes and other small changes RISC-V: - Batch G-stage TLB flushes for GPA range based page table updates - Convert HGEI line management to fully per-HART - Fix missing CSR dirty marking when FWFT state updated via ONE_REG - Fix stale FWFT feature exposure to Guest/VM - Speed up dirty logging write faults using MMU rwlock and atomic PTE updates using cmpxchg() for permission-only changes - Use flexible array for APLIC IRQ state - Use kvm_slot_dirty_track_enabled() for logging enable check on a memslot - Avoid skipping valid pages in kvm_riscv_gstage_wp_range() - Avoid skipping valid pages in kvm_riscv_gstage_unmap_range() - Use endian-specific __lelong for NACL shared memory S390: - KVM_PRE_FAULT_MEMORY support - Support for 2G hugepages - Support for the ASTFLEIE 2 facility - Support for fast inject using kvm_arch_set_irq_inatomic - Fix potential leak of uninitialized bytes - A few more misc gmap fixes x86: - Generic support for the more granular permissions allowed by EPT, namely "read" (which was previously usurping the U bit) and separate execution bits for kernel and userspace - Do not assume that all page tables start with U=1/W=1/NX=0 at the root, as AMD GMET needs to have U=0 at the root - Introduce common assembly macros for use within Intel and AMD vendor-specific vmentry code. This touches the SPEC_CTRL handling, which is now entirely done in assembly for Intel (by reusing the AMD code that already existed), and register save/restore which uses some macro magic to compute the offsets in the struct. Both of these are preparatory changes for upcoming APX support - Clean up KVM's register tracking and storage, primarily to prepare for APX support, which expands the maximum number of GPRs from 16 to 32 - Keep a single copy of the PDPTRs rather than two, since architecturally there is just one - Handle EXIT_FASTPATH_EXIT_USERSPACE in vendor code to ensure vendor code gets a chance to handle things like reaping the PML buffer - Update KVM's view of PV async enabling if and only if the MSR write fully succeeds - Fix a variety of issues where the emulator doesn't honor guest-debug state, and clean up related code along the way - Synthesize EPT Violation and #NPF "error code" bits when injecting faults into L1 that didn't originate in hardware (in which case the VMCS/VMCB doesn't hold relevant information) - Add support for virtualizing (well, emulating) AMD's flavor of CPL>0 CPUID faulting - Clean up the GPR APIs so that KVM's use of "raw" is consistent, and fix a variety of minor bugs along the way - Fix an OOB memory access due to not checking the VP ID when handling a Hyper-V PV TLB flush for L2 - Fix a bug in the mediated PMU's handling of fixed counters that allowed the guest to bypass the PMU event filter - Allow userspace to return EAGAIN when handling SNP and TDX hypercalls, so the KVM can forward a "retry" status code to the guest, and reserve all unused error codes for future usage - Overhaul the TDP MMU => S-EPT code to move as much S-EPT specific logic as possible into the TDX code, and to funnel (almost) all S-EPT updates into a single chokepoint. The motivation is largely to prepare for upcoming Dynamic PAMT support, but the cleanups are nice to have on their own - Plug a hole in shadow page table handling, where KVM fails to recursively zap nested EPT/NPT shadow page tables when the nested hypervisor tears down its own EPT/NPT page tables from the bottom up x86 (Intel): - Support for nested MBEC (Mode-Based Execute Control), see above in the generic section; also run with MBEC enabled even for non-nested mode - Use the kernel's "enum pg_level" in the TDX APIs instead of the TDX-Module's level definitions (which are 0-based) - Rework the TDX memory APIs to not require/assume that guest memory is backed by "struct page" (in prepartion for guest_memfd hugepage support) - Fix a largely benign bug where KVM TDX would incorrectly state it could emulate several x2APIC MSRs - Use the "safe" WRMSR API when proxying LBR MSR writes as the to-be-written value is guest controlled and completely unvalidated x86 (AMD): - Support for nested GMET (Guest Mode Execution Trap), see above in the generic section; also run with GMET enabled even for non-nested mode - Fixes and minor cleanups to GHCB handling, on top of the earlier work already merged into 7.1-rc - Ensure KVM's copy of CR0 and CR3 are up-to-date prior to invoking fastpath handlers - Add support for virtualizing gPAT (KVM previously just used L1's PAT when running L2) - Fix goofs where KVM mishandles side effects (e.g. single-step and PMC updates) when emulating VMRUN - Fix a variety of bugs in AVIC's handling of x2APIC MSR interception, most notably where KVM didn't disable interception of IRR, ISR, and TMR regs - Add support for virtualizing Host-Only/Guest-Only bits in the mediated PMU - Don't advertise support for unusable VM types, and account for VM types that are disabled by firmware, e.g. to mitigate security vulnerabilities - Rewrite the SEV {en,de}crypt debug ioctls as they were riddle with bugs and unnecessarily complicated, and add comprehensive tests - Clean up and deduplicate the SEV page pinning code - Fix minor goofs related to writing back CPUID information after firmware rejects a CPUID page for an SNP vCPU Generic: - Rename invalidate_begin() to invalidate_start() throughout KVM to follow the kernel's nomenclature, e.g. for mmu_notifiers - Use guard() to cleanup up various KVM+VFIO flows - Minor cleanups guest_memfd: - Return -EEXIST instead of -EINVAL if userspace attempts to bind a gmem range to multiple memslots, and fix the test that was supposed to ensure KVM returns -EEXIST - Treat memslot binding offsets and sizes as unsigned values to fix a bug where KVM interprets a large "offset + size" as a negative value and allows a nonsensical offset - Use the inode number instead of the page offset for the NUMA interleaving index to fix a bug where the effective index would jump by two for consecutive pages (the caller also adds in the page offset) Selftests: - Randomize the dirty log test's delay when reaping the bitmap on the first pass, as always waiting only 1ms hid a KVM RISC-V bug as the test reaped the bitmap before KVM could build up enough state to hit the bug - A pile of one-off fixes and cleanups" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (326 commits) KVM: x86/mmu: Ensure hugepage is in by slot before checking max mapping level KVM: x86: Fix shadow paging use-after-free due to unexpected role KVM: s390: Introducing kvm_arch_set_irq_inatomic fast inject KVM: s390: Enable adapter_indicators_set to use mapped pages KVM: s390: Add map/unmap ioctl and clean mappings post-guest riscv: kvm: Use endian-specific __lelong for NACL shared memory KVM: selftests: access_tracking_perf_test: bump number of NUMA nodes to 32 KVM: s390: vsie: Implement ASTFLEIE facility 2 KVM: s390: vsie: Refactor handle_stfle s390/sclp: Detect ASTFLEIE 2 facility KVM: s390: Minor refactor of base/ext facility lists KVM: x86/mmu: move pdptrs out of the MMU KVM: x86: check that kvm_handle_invpcid is only invoked with shadow paging KVM: nSVM: invalidate cached PDPTRs across nested NPT transitions KVM: nVMX: remove unnecessary code in prepare_vmcs02_rare KVM: x86: remove nested_mmu from mmu_is_nested() KVM: arm64: vgic-its: Make ABI commit helpers return void KVM: s390: Initialize KVM_S390_GET_CMMA_BITS memory LoongArch: KVM: Add missing slots_lock for device register/unregister LoongArch: KVM: Validate irqchip index in irqfd routing ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/call_once.h2
-rw-r--r--include/linux/irqchip/riscv-imsic.h5
-rw-r--r--include/linux/kvm_host.h22
-rw-r--r--include/linux/psp-sev.h37
4 files changed, 56 insertions, 10 deletions
diff --git a/include/linux/call_once.h b/include/linux/call_once.h
index 13cd6469e7e5..1625a9d6ff5b 100644
--- a/include/linux/call_once.h
+++ b/include/linux/call_once.h
@@ -36,7 +36,7 @@ do { \
* it returns a zero or positive value, mark @once as completed. Return
* the value returned by @cb
*
- * If @once has completed succesfully before, return 0.
+ * If @once has completed successfully before, return 0.
*
* The call to @cb is implicitly surrounded by a mutex, though for
* efficiency the * function avoids taking it after the first call.
diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h
index 4b348836de7a..61af3a5bea09 100644
--- a/include/linux/irqchip/riscv-imsic.h
+++ b/include/linux/irqchip/riscv-imsic.h
@@ -40,6 +40,9 @@
struct imsic_local_config {
phys_addr_t msi_pa;
void __iomem *msi_va;
+
+ /* Number of guest interrupt files per-HART */
+ u32 nr_guest_files;
};
struct imsic_global_config {
@@ -68,7 +71,7 @@ struct imsic_global_config {
/* Number of guest interrupt identities */
u32 nr_guest_ids;
- /* Number of guest interrupt files per core */
+ /* Number of guest interrupt files across all HARTs */
u32 nr_guest_files;
/* Per-CPU IMSIC addresses */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4c14aee1fb06..ab8cfaec82d3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1390,20 +1390,20 @@ void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *mems
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
-int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map,
+int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
bool writable);
void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map);
-static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa,
+static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn,
struct kvm_host_map *map)
{
- return __kvm_vcpu_map(vcpu, gpa, map, true);
+ return __kvm_vcpu_map(vcpu, gfn, map, true);
}
-static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa,
+static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gfn_t gfn,
struct kvm_host_map *map)
{
- return __kvm_vcpu_map(vcpu, gpa, map, false);
+ return __kvm_vcpu_map(vcpu, gfn, map, false);
}
static inline void kvm_vcpu_map_mark_dirty(struct kvm_vcpu *vcpu,
@@ -1562,7 +1562,7 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
-void kvm_mmu_invalidate_begin(struct kvm *kvm);
+void kvm_mmu_invalidate_start(struct kvm *kvm);
void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
void kvm_mmu_invalidate_end(struct kvm *kvm);
bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -1815,6 +1815,11 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
+static inline bool is_gfn_in_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ return gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages;
+}
+
/*
* Returns a pointer to the memslot if it contains gfn.
* Otherwise returns NULL.
@@ -1825,7 +1830,7 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
if (!slot)
return NULL;
- if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
+ if (is_gfn_in_memslot(slot, gfn))
return slot;
else
return NULL;
@@ -2596,7 +2601,8 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
struct page *page, void *opaque);
-long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
+long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src,
+ long npages, bool may_writeback_src,
kvm_gmem_populate_cb post_populate, void *opaque);
#endif
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index d5099a2baca5..ce16bbc0b308 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -902,6 +902,42 @@ struct snp_feature_info {
/* Feature bits in EBX */
#define SNP_SEV_TIO_SUPPORTED BIT(1)
+/**
+ * struct sev_snp_tcb_version_genoa_milan
+ *
+ * @boot_loader: SVN of PSP bootloader
+ * @tee: SVN of PSP operating system
+ * @reserved: reserved
+ * @snp: SVN of SNP firmware
+ * @microcode: Lowest current patch level of all cores
+ */
+struct sev_snp_tcb_version_genoa_milan {
+ u8 boot_loader;
+ u8 tee;
+ u8 reserved[4];
+ u8 snp;
+ u8 microcode;
+};
+
+/**
+ * struct sev_snp_tcb_version_turin
+ *
+ * @fmc: SVN of FMC firmware
+ * @boot_loader: SVN of PSP bootloader
+ * @tee: SVN of PSP operating system
+ * @snp: SVN of SNP firmware
+ * @reserved: reserved
+ * @microcode: Lowest current patch level of all cores
+ */
+struct sev_snp_tcb_version_turin {
+ u8 fmc;
+ u8 boot_loader;
+ u8 tee;
+ u8 snp;
+ u8 reserved[3];
+ u8 microcode;
+};
+
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
/**
@@ -1048,6 +1084,7 @@ void snp_free_firmware_page(void *addr);
void sev_platform_shutdown(void);
bool sev_is_snp_ciphertext_hiding_supported(void);
u64 sev_get_snp_policy_bits(void);
+int sev_firmware_supported_vm_types(void);
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */