summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2026-04-08 12:22:35 +0100
committerMarc Zyngier <maz@kernel.org>2026-04-08 12:22:35 +0100
commitf8078d51ee232c8d4fa552d30e06c641b944e2c2 (patch)
treeb7c6c8c3c8a568b0ce98fa9b31a4118ed15bc3d5
parent2de32a25a3f721052c9aaf753a65b96f63c2c7d9 (diff)
parentce29261ec6482de54320c03398eb30e9615aee40 (diff)
Merge branch kvm-arm64/vgic-v5-ppi into kvmarm-master/next
* kvm-arm64/vgic-v5-ppi: (40 commits) : . : Add initial GICv5 support for KVM guests, only adding PPI support : for the time being. Patches courtesy of Sascha Bischoff. : : From the cover letter: : : "This is v7 of the patch series to add the virtual GICv5 [1] device : (vgic_v5). Only PPIs are supported by this initial series, and the : vgic_v5 implementation is restricted to the CPU interface, : only. Further patch series are to follow in due course, and will add : support for SPIs, LPIs, the GICv5 IRS, and the GICv5 ITS." : . KVM: arm64: selftests: Add no-vgic-v5 selftest KVM: arm64: selftests: Introduce a minimal GICv5 PPI selftest KVM: arm64: gic-v5: Communicate userspace-driveable PPIs via a UAPI Documentation: KVM: Introduce documentation for VGICv5 KVM: arm64: gic-v5: Probe for GICv5 device KVM: arm64: gic-v5: Set ICH_VCTLR_EL2.En on boot KVM: arm64: gic-v5: Introduce kvm_arm_vgic_v5_ops and register them KVM: arm64: gic-v5: Hide FEAT_GCIE from NV GICv5 guests KVM: arm64: gic: Hide GICv5 for protected guests KVM: arm64: gic-v5: Mandate architected PPI for PMU emulation on GICv5 KVM: arm64: gic-v5: Enlighten arch timer for GICv5 irqchip/gic-v5: Introduce minimal irq_set_type() for PPIs KVM: arm64: gic-v5: Initialise ID and priority bits when resetting vcpu KVM: arm64: gic-v5: Create and initialise vgic_v5 KVM: arm64: gic-v5: Support GICv5 interrupts with KVM_IRQ_LINE KVM: arm64: gic-v5: Implement direct injection of PPIs KVM: arm64: Introduce set_direct_injection irq_op KVM: arm64: gic-v5: Trap and mask guest ICC_PPI_ENABLERx_EL1 writes KVM: arm64: gic-v5: Check for pending PPIs KVM: arm64: gic-v5: Clear TWI if single task running ... Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r--Documentation/virt/kvm/api.rst6
-rw-r--r--Documentation/virt/kvm/devices/arm-vgic-v5.rst50
-rw-r--r--Documentation/virt/kvm/devices/index.rst1
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst5
-rw-r--r--arch/arm64/include/asm/el2_setup.h2
-rw-r--r--arch/arm64/include/asm/kvm_asm.h2
-rw-r--r--arch/arm64/include/asm/kvm_host.h34
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h10
-rw-r--r--arch/arm64/include/asm/sysreg.h7
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h1
-rw-r--r--arch/arm64/kvm/arch_timer.c108
-rw-r--r--arch/arm64/kvm/arm.c44
-rw-r--r--arch/arm64/kvm/config.c123
-rw-r--r--arch/arm64/kvm/emulate-nested.c68
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c16
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c15
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c8
-rw-r--r--arch/arm64/kvm/hyp/vgic-v5-sr.c166
-rw-r--r--arch/arm64/kvm/hyp/vhe/Makefile2
-rw-r--r--arch/arm64/kvm/nested.c5
-rw-r--r--arch/arm64/kvm/pmu-emul.c20
-rw-r--r--arch/arm64/kvm/sys_regs.c176
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c214
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c107
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c40
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c503
-rw-r--r--arch/arm64/kvm/vgic/vgic.c173
-rw-r--r--arch/arm64/kvm/vgic/vgic.h53
-rw-r--r--arch/arm64/tools/sysreg480
-rw-r--r--drivers/irqchip/irq-gic-v5.c18
-rw-r--r--include/kvm/arm_arch_timer.h11
-rw-r--r--include/kvm/arm_pmu.h5
-rw-r--r--include/kvm/arm_vgic.h191
-rw-r--r--include/linux/irqchip/arm-gic-v5.h27
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/uapi/linux/kvm.h2
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h1
-rw-r--r--tools/include/uapi/linux/kvm.h2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm3
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c177
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic.c297
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_v5.c228
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v5.h150
47 files changed, 3204 insertions, 382 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 032516783e96..03d87d9b97d9 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -907,10 +907,12 @@ The irq_type field has the following values:
- KVM_ARM_IRQ_TYPE_CPU:
out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
- KVM_ARM_IRQ_TYPE_SPI:
- in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
+ in-kernel GICv2/GICv3: SPI, irq_id between 32 and 1019 (incl.)
(the vcpu_index field is ignored)
+ in-kernel GICv5: SPI, irq_id between 0 and 65535 (incl.)
- KVM_ARM_IRQ_TYPE_PPI:
- in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
+ in-kernel GICv2/GICv3: PPI, irq_id between 16 and 31 (incl.)
+ in-kernel GICv5: PPI, irq_id between 0 and 127 (incl.)
(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v5.rst b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
new file mode 100644
index 000000000000..29335ea823fc
--- /dev/null
+++ b/Documentation/virt/kvm/devices/arm-vgic-v5.rst
@@ -0,0 +1,50 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================
+ARM Virtual Generic Interrupt Controller v5 (VGICv5)
+====================================================
+
+
+Device types supported:
+ - KVM_DEV_TYPE_ARM_VGIC_V5 ARM Generic Interrupt Controller v5.0
+
+Only one VGIC instance may be instantiated through this API. The created VGIC
+will act as the VM interrupt controller, requiring emulated user-space devices
+to inject interrupts to the VGIC instead of directly to CPUs.
+
+Creating a guest GICv5 device requires a host GICv5 host. The current VGICv5
+device only supports PPI interrupts. These can either be injected from emulated
+in-kernel devices (such as the Arch Timer, or PMU), or via the KVM_IRQ_LINE
+ioctl.
+
+Groups:
+ KVM_DEV_ARM_VGIC_GRP_CTRL
+ Attributes:
+
+ KVM_DEV_ARM_VGIC_CTRL_INIT
+ request the initialization of the VGIC, no additional parameter in
+ kvm_device_attr.addr. Must be called after all VCPUs have been created.
+
+ KVM_DEV_ARM_VGIC_USERPSPACE_PPIs
+ request the mask of userspace-drivable PPIs. Only a subset of the PPIs can
+ be directly driven from userspace with GICv5, and the returned mask
+ informs userspace of which it is allowed to drive via KVM_IRQ_LINE.
+
+ Userspace must allocate and point to __u64[2] of data in
+ kvm_device_attr.addr. When this call returns, the provided memory will be
+ populated with the userspace PPI mask. The lower __u64 contains the mask
+ for the lower 64 PPIS, with the remaining 64 being in the second __u64.
+
+ This is a read-only attribute, and cannot be set. Attempts to set it are
+ rejected.
+
+ Errors:
+
+ ======= ========================================================
+ -ENXIO VGIC not properly configured as required prior to calling
+ this attribute
+ -ENODEV no online VCPU
+ -ENOMEM memory shortage when allocating vgic internal data
+ -EFAULT Invalid guest ram access
+ -EBUSY One or more VCPUS are running
+ ======= ========================================================
diff --git a/Documentation/virt/kvm/devices/index.rst b/Documentation/virt/kvm/devices/index.rst
index 192cda7405c8..70845aba38f4 100644
--- a/Documentation/virt/kvm/devices/index.rst
+++ b/Documentation/virt/kvm/devices/index.rst
@@ -10,6 +10,7 @@ Devices
arm-vgic-its
arm-vgic
arm-vgic-v3
+ arm-vgic-v5
mpic
s390_flic
vcpu
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 60bf205cb373..5e3805820010 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -37,7 +37,8 @@ Returns:
A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
type must be same for each vcpu. As a PPI, the interrupt number is the same for
-all vcpus, while as an SPI it must be a separate number per vcpu.
+all vcpus, while as an SPI it must be a separate number per vcpu. For
+GICv5-based guests, the architected PPI (23) must be used.
1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
---------------------------------------
@@ -50,7 +51,7 @@ Returns:
-EEXIST Interrupt number already used
-ENODEV PMUv3 not supported or GIC not initialized
-ENXIO PMUv3 not supported, missing VCPU feature or interrupt
- number not set
+ number not set (non-GICv5 guests, only)
-EBUSY PMUv3 already initialized
======= ======================================================
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 85f4c1615472..998b2a3f615a 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -248,6 +248,8 @@
ICH_HFGWTR_EL2_ICC_CR0_EL1 | \
ICH_HFGWTR_EL2_ICC_APR_EL1)
msr_s SYS_ICH_HFGWTR_EL2, x0 // Disable reg write traps
+ mov x0, #(ICH_VCTLR_EL2_En)
+ msr_s SYS_ICH_VCTLR_EL2, x0 // Enable vHPPI selection
.Lskip_gicv5_\@:
.endm
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c8eb992d3ac8..724319298e71 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -81,6 +81,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v5_save_apr,
+ __KVM_HOST_SMCCC_FUNC___vgic_v5_restore_vmcr_apr,
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7fff5bede09a..7298a68eaef9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -287,6 +287,9 @@ enum fgt_group_id {
HDFGRTR2_GROUP,
HDFGWTR2_GROUP = HDFGRTR2_GROUP,
HFGITR2_GROUP,
+ ICH_HFGRTR_GROUP,
+ ICH_HFGWTR_GROUP = ICH_HFGRTR_GROUP,
+ ICH_HFGITR_GROUP,
/* Must be last */
__NR_FGT_GROUP_IDS__
@@ -620,6 +623,10 @@ enum vcpu_sysreg {
VNCR(ICH_HCR_EL2),
VNCR(ICH_VMCR_EL2),
+ VNCR(ICH_HFGRTR_EL2),
+ VNCR(ICH_HFGWTR_EL2),
+ VNCR(ICH_HFGITR_EL2),
+
NR_SYS_REGS /* Nothing after this line! */
};
@@ -675,6 +682,9 @@ extern struct fgt_masks hfgwtr2_masks;
extern struct fgt_masks hfgitr2_masks;
extern struct fgt_masks hdfgrtr2_masks;
extern struct fgt_masks hdfgwtr2_masks;
+extern struct fgt_masks ich_hfgrtr_masks;
+extern struct fgt_masks ich_hfgwtr_masks;
+extern struct fgt_masks ich_hfgitr_masks;
extern struct fgt_masks kvm_nvhe_sym(hfgrtr_masks);
extern struct fgt_masks kvm_nvhe_sym(hfgwtr_masks);
@@ -687,6 +697,9 @@ extern struct fgt_masks kvm_nvhe_sym(hfgwtr2_masks);
extern struct fgt_masks kvm_nvhe_sym(hfgitr2_masks);
extern struct fgt_masks kvm_nvhe_sym(hdfgrtr2_masks);
extern struct fgt_masks kvm_nvhe_sym(hdfgwtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(ich_hfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(ich_hfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(ich_hfgitr_masks);
struct kvm_cpu_context {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -787,6 +800,21 @@ struct kvm_host_data {
/* Last vgic_irq part of the AP list recorded in an LR */
struct vgic_irq *last_lr_irq;
+
+ /* PPI state tracking for GICv5-based guests */
+ struct {
+ /*
+ * For tracking the PPI pending state, we need both the entry
+ * state and exit state to correctly detect edges as it is
+ * possible that an interrupt has been injected in software in
+ * the interim.
+ */
+ DECLARE_BITMAP(pendr_entry, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(pendr_exit, VGIC_V5_NR_PRIVATE_IRQS);
+
+ /* The saved state of the regs when leaving the guest */
+ DECLARE_BITMAP(activer_exit, VGIC_V5_NR_PRIVATE_IRQS);
+ } vgic_v5_ppi_state;
};
struct kvm_host_psci_config {
@@ -1662,6 +1690,11 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
case HDFGRTR2_EL2:
case HDFGWTR2_EL2:
return HDFGRTR2_GROUP;
+ case ICH_HFGRTR_EL2:
+ case ICH_HFGWTR_EL2:
+ return ICH_HFGRTR_GROUP;
+ case ICH_HFGITR_EL2:
+ return ICH_HFGITR_GROUP;
default:
BUILD_BUG_ON(1);
}
@@ -1676,6 +1709,7 @@ static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg
case HDFGWTR_EL2: \
case HFGWTR2_EL2: \
case HDFGWTR2_EL2: \
+ case ICH_HFGWTR_EL2: \
p = &(vcpu)->arch.fgt[id].w; \
break; \
default: \
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 4bf63025061e..8d06b62e7188 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -87,6 +87,15 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
+/* GICv5 */
+void __vgic_v5_save_apr(struct vgic_v5_cpu_if *cpu_if);
+void __vgic_v5_restore_vmcr_apr(struct vgic_v5_cpu_if *cpu_if);
+/* No hypercalls for the following */
+void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if);
+void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if);
+void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if);
+void __vgic_v5_restore_state(struct vgic_v5_cpu_if *cpu_if);
+
#ifdef __KVM_NVHE_HYPERVISOR__
void __timer_enable_traps(struct kvm_vcpu *vcpu);
void __timer_disable_traps(struct kvm_vcpu *vcpu);
@@ -135,6 +144,7 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64pfr2_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f4436ecc630c..938cdb248f83 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1052,6 +1052,7 @@
#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+#define GICV5_OP_GICR_CDNMIA sys_insn(1, 0, 12, 3, 1)
/* Definitions for GIC CDAFF */
#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
@@ -1098,6 +1099,12 @@
#define GICV5_GIC_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
#define GICV5_GIC_CDIA_ID_MASK GENMASK_ULL(23, 0)
+/* Definitions for GICR CDNMIA */
+#define GICV5_GICR_CDNMIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDNMIA_VALID(r) FIELD_GET(GICV5_GICR_CDNMIA_VALID_MASK, r)
+#define GICV5_GICR_CDNMIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDNMIA_ID_MASK GENMASK_ULL(23, 0)
+
#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index c2485a862e69..14366d35ce82 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -108,5 +108,8 @@
#define VNCR_MPAMVPM5_EL2 0x968
#define VNCR_MPAMVPM6_EL2 0x970
#define VNCR_MPAMVPM7_EL2 0x978
+#define VNCR_ICH_HFGITR_EL2 0xB10
+#define VNCR_ICH_HFGRTR_EL2 0xB18
+#define VNCR_ICH_HFGWTR_EL2 0xB20
#endif /* __ARM64_VNCR_MAPPING_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index a792a599b9d6..1c13bfa2d38a 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -428,6 +428,7 @@ enum {
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
+#define KVM_DEV_ARM_VGIC_USERSPACE_PPIS 5
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 600f250753b4..67b989671b41 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -56,6 +56,12 @@ static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};
+static struct irq_ops arch_timer_irq_ops_vgic_v5 = {
+ .get_input_level = kvm_arch_timer_get_input_level,
+ .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
+ .set_direct_injection = vgic_v5_set_ppi_dvi,
+};
+
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv(vcpu))
@@ -177,6 +183,10 @@ void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
map->emul_ptimer = vcpu_ptimer(vcpu);
}
+ map->direct_vtimer->direct = true;
+ if (map->direct_ptimer)
+ map->direct_ptimer->direct = true;
+
trace_kvm_get_timer_map(vcpu->vcpu_id, map);
}
@@ -396,7 +406,11 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+
+ return kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer) ||
+ (vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0);
}
/*
@@ -447,6 +461,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
if (userspace_irqchip(vcpu->kvm))
return;
+ /* Skip injecting on GICv5 for directly injected (DVI'd) timers */
+ if (vgic_is_v5(vcpu->kvm) && timer_ctx->direct)
+ return;
+
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
timer_irq(timer_ctx),
timer_ctx->irq.level,
@@ -674,6 +692,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
phys_active |= ctx->irq.level;
+ phys_active |= vgic_is_v5(vcpu->kvm);
set_timer_irq_phys_active(ctx, phys_active);
}
@@ -740,13 +759,11 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
ret = kvm_vgic_map_phys_irq(vcpu,
map->direct_vtimer->host_timer_irq,
- timer_irq(map->direct_vtimer),
- &arch_timer_irq_ops);
+ timer_irq(map->direct_vtimer));
WARN_ON_ONCE(ret);
ret = kvm_vgic_map_phys_irq(vcpu,
map->direct_ptimer->host_timer_irq,
- timer_irq(map->direct_ptimer),
- &arch_timer_irq_ops);
+ timer_irq(map->direct_ptimer));
WARN_ON_ONCE(ret);
}
}
@@ -864,7 +881,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
get_timer_map(vcpu, &map);
if (static_branch_likely(&has_gic_active_state)) {
- if (vcpu_has_nv(vcpu))
+ /* We don't do NV on GICv5, yet */
+ if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu->kvm))
kvm_timer_vcpu_load_nested_switch(vcpu, &map);
kvm_timer_vcpu_load_gic(map.direct_vtimer);
@@ -934,6 +952,12 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (kvm_vcpu_is_blocking(vcpu))
kvm_timer_blocking(vcpu);
+
+ if (vgic_is_v5(vcpu->kvm)) {
+ set_timer_irq_phys_active(map.direct_vtimer, false);
+ if (map.direct_ptimer)
+ set_timer_irq_phys_active(map.direct_ptimer, false);
+ }
}
void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
@@ -1097,10 +1121,19 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
HRTIMER_MODE_ABS_HARD);
}
+/*
+ * This is always called during kvm_arch_init_vm, but will also be
+ * called from kvm_vgic_create if we have a vGICv5.
+ */
void kvm_timer_init_vm(struct kvm *kvm)
{
+ /*
+ * Set up the default PPIs - note that we adjust them based on
+ * the model of the GIC as GICv5 uses a different way to
+ * describing interrupts.
+ */
for (int i = 0; i < NR_KVM_TIMERS; i++)
- kvm->arch.timer_data.ppi[i] = default_ppi[i];
+ kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]);
}
void kvm_timer_cpu_up(void)
@@ -1269,7 +1302,15 @@ static int timer_irq_set_irqchip_state(struct irq_data *d,
static void timer_irq_eoi(struct irq_data *d)
{
- if (!irqd_is_forwarded_to_vcpu(d))
+ /*
+ * On a GICv5 host, we still need to call EOI on the parent for
+ * PPIs. The host driver already handles irqs which are forwarded to
+ * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This
+ * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to
+ * call EOI unsurprisingly results in *BAD* lock-ups.
+ */
+ if (!irqd_is_forwarded_to_vcpu(d) ||
+ kvm_vgic_global_state.type == VGIC_V5)
irq_chip_eoi_parent(d);
}
@@ -1333,7 +1374,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
host_vtimer_irq = info->virtual_irq;
kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
- if (kvm_vgic_global_state.no_hw_deactivation) {
+ if (kvm_vgic_global_state.no_hw_deactivation ||
+ kvm_vgic_global_state.type == VGIC_V5) {
struct fwnode_handle *fwnode;
struct irq_data *data;
@@ -1351,7 +1393,8 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
return -ENOMEM;
}
- arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+ if (kvm_vgic_global_state.no_hw_deactivation)
+ arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
(void *)TIMER_VTIMER));
}
@@ -1502,10 +1545,13 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
break;
/*
- * We know by construction that we only have PPIs, so
- * all values are less than 32.
+ * We know by construction that we only have PPIs, so all values
+ * are less than 32 for non-GICv5 VGICs. On GICv5, they are
+ * architecturally defined to be under 32 too. However, we mask
+ * off most of the bits as we might be presented with a GICv5
+ * style PPI where the type is encoded in the top-bits.
*/
- ppis |= BIT(irq);
+ ppis |= BIT(irq & 0x1f);
}
valid = hweight32(ppis) == nr_timers(vcpu);
@@ -1543,6 +1589,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;
+ struct irq_ops *ops;
int ret;
if (timer->enabled)
@@ -1563,20 +1610,22 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
get_timer_map(vcpu, &map);
+ ops = vgic_is_v5(vcpu->kvm) ? &arch_timer_irq_ops_vgic_v5 :
+ &arch_timer_irq_ops;
+
+ for (int i = 0; i < nr_timers(vcpu); i++)
+ kvm_vgic_set_irq_ops(vcpu, timer_irq(vcpu_get_timer(vcpu, i)), ops);
+
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_vtimer->host_timer_irq,
- timer_irq(map.direct_vtimer),
- &arch_timer_irq_ops);
+ timer_irq(map.direct_vtimer));
if (ret)
return ret;
- if (map.direct_ptimer) {
+ if (map.direct_ptimer)
ret = kvm_vgic_map_phys_irq(vcpu,
map.direct_ptimer->host_timer_irq,
- timer_irq(map.direct_ptimer),
- &arch_timer_irq_ops);
- }
-
+ timer_irq(map.direct_ptimer));
if (ret)
return ret;
@@ -1603,15 +1652,14 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (get_user(irq, uaddr))
return -EFAULT;
- if (!(irq_is_ppi(irq)))
+ if (!(irq_is_ppi(vcpu->kvm, irq)))
return -EINVAL;
- mutex_lock(&vcpu->kvm->arch.config_lock);
+ guard(mutex)(&vcpu->kvm->arch.config_lock);
if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
&vcpu->kvm->arch.flags)) {
- ret = -EBUSY;
- goto out;
+ return -EBUSY;
}
switch (attr->attr) {
@@ -1628,19 +1676,23 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
idx = TIMER_HPTIMER;
break;
default:
- ret = -ENXIO;
- goto out;
+ return -ENXIO;
}
/*
+ * The PPIs for the Arch Timers are architecturally defined for
+ * GICv5. Reject anything that changes them from the specified value.
+ */
+ if (vgic_is_v5(vcpu->kvm) && vcpu->kvm->arch.timer_data.ppi[idx] != irq)
+ return -EINVAL;
+
+ /*
* We cannot validate the IRQ unicity before we run, so take it at
* face value. The verdict will be given on first vcpu run, for each
* vcpu. Yes this is late. Blame it on the stupid API.
*/
vcpu->kvm->arch.timer_data.ppi[idx] = irq;
-out:
- mutex_unlock(&vcpu->kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 04c43c9eb764..c4c810b01012 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -47,6 +47,9 @@
#include <kvm/arm_hypercalls.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
+#include <kvm/arm_vgic.h>
+
+#include <linux/irqchip/arm-gic-v5.h>
#include "sys_regs.h"
@@ -615,6 +618,9 @@ static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
if (unlikely(kvm_wfi_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK))
return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
+ if (vgic_is_v5(vcpu->kvm))
+ return single_task_running();
+
return single_task_running() &&
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
(atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) ||
@@ -938,6 +944,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
+ ret = vgic_v5_finalize_ppi_state(kvm);
+ if (ret)
+ return ret;
+
if (is_protected_kvm_enabled()) {
ret = pkvm_create_hyp_vm(kvm);
if (ret)
@@ -1443,10 +1453,11 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status)
{
- u32 irq = irq_level->irq;
unsigned int irq_type, vcpu_id, irq_num;
struct kvm_vcpu *vcpu = NULL;
bool level = irq_level->level;
+ u32 irq = irq_level->irq;
+ unsigned long *mask;
irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
vcpu_id = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
@@ -1476,16 +1487,37 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (!vcpu)
return -EINVAL;
- if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
+ if (vgic_is_v5(kvm)) {
+ if (irq_num >= VGIC_V5_NR_PRIVATE_IRQS)
+ return -EINVAL;
+
+ /*
+ * Only allow PPIs that are explicitly exposed to
+ * usespace to be driven via KVM_IRQ_LINE
+ */
+ mask = kvm->arch.vgic.gicv5_vm.userspace_ppis;
+ if (!test_bit(irq_num, mask))
+ return -EINVAL;
+
+ /* Build a GICv5-style IntID here */
+ irq_num = vgic_v5_make_ppi(irq_num);
+ } else if (irq_num < VGIC_NR_SGIS ||
+ irq_num >= VGIC_NR_PRIVATE_IRQS) {
return -EINVAL;
+ }
return kvm_vgic_inject_irq(kvm, vcpu, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
- if (irq_num < VGIC_NR_PRIVATE_IRQS)
- return -EINVAL;
+ if (vgic_is_v5(kvm)) {
+ /* Build a GICv5-style IntID here */
+ irq_num = vgic_v5_make_spi(irq_num);
+ } else {
+ if (irq_num < VGIC_NR_PRIVATE_IRQS)
+ return -EINVAL;
+ }
return kvm_vgic_inject_irq(kvm, NULL, irq_num, level, NULL);
}
@@ -2515,6 +2547,7 @@ static void kvm_hyp_init_symbols(void)
{
kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = get_hyp_id_aa64pfr0_el1();
kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ kvm_nvhe_sym(id_aa64pfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR2_EL1);
kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
@@ -2537,6 +2570,9 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(hfgitr2_masks) = hfgitr2_masks;
kvm_nvhe_sym(hdfgrtr2_masks)= hdfgrtr2_masks;
kvm_nvhe_sym(hdfgwtr2_masks)= hdfgwtr2_masks;
+ kvm_nvhe_sym(ich_hfgrtr_masks) = ich_hfgrtr_masks;
+ kvm_nvhe_sym(ich_hfgwtr_masks) = ich_hfgwtr_masks;
+ kvm_nvhe_sym(ich_hfgitr_masks) = ich_hfgitr_masks;
/*
* Flush entire BSS since part of its data containing init symbols is read
diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c
index d9f553cbf9df..e14685343191 100644
--- a/arch/arm64/kvm/config.c
+++ b/arch/arm64/kvm/config.c
@@ -225,6 +225,7 @@ struct reg_feat_map_desc {
#define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP
#define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP
#define FEAT_S2PIE ID_AA64MMFR3_EL1, S2PIE, IMP
+#define FEAT_GCIE ID_AA64PFR2_EL1, GCIE, IMP
static bool not_feat_aa64el3(struct kvm *kvm)
{
@@ -1277,6 +1278,58 @@ static const struct reg_bits_to_feat_map vtcr_el2_feat_map[] = {
static const DECLARE_FEAT_MAP(vtcr_el2_desc, VTCR_EL2,
vtcr_el2_feat_map, FEAT_AA64EL2);
+static const struct reg_bits_to_feat_map ich_hfgrtr_feat_map[] = {
+ NEEDS_FEAT(ICH_HFGRTR_EL2_ICC_APR_EL1 |
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1 |
+ ICH_HFGRTR_EL2_ICC_CR0_EL1 |
+ ICH_HFGRTR_EL2_ICC_HPPIR_EL1 |
+ ICH_HFGRTR_EL2_ICC_PCR_EL1 |
+ ICH_HFGRTR_EL2_ICC_ICSR_EL1 |
+ ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 |
+ ICH_HFGRTR_EL2_ICC_PPI_HMRn_EL1 |
+ ICH_HFGRTR_EL2_ICC_PPI_ENABLERn_EL1 |
+ ICH_HFGRTR_EL2_ICC_PPI_PENDRn_EL1 |
+ ICH_HFGRTR_EL2_ICC_PPI_PRIORITYRn_EL1 |
+ ICH_HFGRTR_EL2_ICC_PPI_ACTIVERn_EL1,
+ FEAT_GCIE),
+};
+
+static const DECLARE_FEAT_MAP_FGT(ich_hfgrtr_desc, ich_hfgrtr_masks,
+ ich_hfgrtr_feat_map, FEAT_GCIE);
+
+static const struct reg_bits_to_feat_map ich_hfgwtr_feat_map[] = {
+ NEEDS_FEAT(ICH_HFGWTR_EL2_ICC_APR_EL1 |
+ ICH_HFGWTR_EL2_ICC_CR0_EL1 |
+ ICH_HFGWTR_EL2_ICC_PCR_EL1 |
+ ICH_HFGWTR_EL2_ICC_ICSR_EL1 |
+ ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1 |
+ ICH_HFGWTR_EL2_ICC_PPI_PENDRn_EL1 |
+ ICH_HFGWTR_EL2_ICC_PPI_PRIORITYRn_EL1 |
+ ICH_HFGWTR_EL2_ICC_PPI_ACTIVERn_EL1,
+ FEAT_GCIE),
+};
+
+static const DECLARE_FEAT_MAP_FGT(ich_hfgwtr_desc, ich_hfgwtr_masks,
+ ich_hfgwtr_feat_map, FEAT_GCIE);
+
+static const struct reg_bits_to_feat_map ich_hfgitr_feat_map[] = {
+ NEEDS_FEAT(ICH_HFGITR_EL2_GICCDEN |
+ ICH_HFGITR_EL2_GICCDDIS |
+ ICH_HFGITR_EL2_GICCDPRI |
+ ICH_HFGITR_EL2_GICCDAFF |
+ ICH_HFGITR_EL2_GICCDPEND |
+ ICH_HFGITR_EL2_GICCDRCFG |
+ ICH_HFGITR_EL2_GICCDHM |
+ ICH_HFGITR_EL2_GICCDEOI |
+ ICH_HFGITR_EL2_GICCDDI |
+ ICH_HFGITR_EL2_GICRCDIA |
+ ICH_HFGITR_EL2_GICRCDNMIA,
+ FEAT_GCIE),
+};
+
+static const DECLARE_FEAT_MAP_FGT(ich_hfgitr_desc, ich_hfgitr_masks,
+ ich_hfgitr_feat_map, FEAT_GCIE);
+
static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
int map_size, u64 resx, const char *str)
{
@@ -1328,6 +1381,9 @@ void __init check_feature_map(void)
check_reg_desc(&sctlr_el2_desc);
check_reg_desc(&mdcr_el2_desc);
check_reg_desc(&vtcr_el2_desc);
+ check_reg_desc(&ich_hfgrtr_desc);
+ check_reg_desc(&ich_hfgwtr_desc);
+ check_reg_desc(&ich_hfgitr_desc);
}
static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map)
@@ -1460,6 +1516,13 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt)
val |= compute_fgu_bits(kvm, &hdfgrtr2_desc);
val |= compute_fgu_bits(kvm, &hdfgwtr2_desc);
break;
+ case ICH_HFGRTR_GROUP:
+ val |= compute_fgu_bits(kvm, &ich_hfgrtr_desc);
+ val |= compute_fgu_bits(kvm, &ich_hfgwtr_desc);
+ break;
+ case ICH_HFGITR_GROUP:
+ val |= compute_fgu_bits(kvm, &ich_hfgitr_desc);
+ break;
default:
BUG();
}
@@ -1531,6 +1594,15 @@ struct resx get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg)
case VTCR_EL2:
resx = compute_reg_resx_bits(kvm, &vtcr_el2_desc, 0, 0);
break;
+ case ICH_HFGRTR_EL2:
+ resx = compute_reg_resx_bits(kvm, &ich_hfgrtr_desc, 0, 0);
+ break;
+ case ICH_HFGWTR_EL2:
+ resx = compute_reg_resx_bits(kvm, &ich_hfgwtr_desc, 0, 0);
+ break;
+ case ICH_HFGITR_EL2:
+ resx = compute_reg_resx_bits(kvm, &ich_hfgitr_desc, 0, 0);
+ break;
default:
WARN_ON_ONCE(1);
resx = (typeof(resx)){};
@@ -1565,6 +1637,12 @@ static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg
return &hdfgrtr2_masks;
case HDFGWTR2_EL2:
return &hdfgwtr2_masks;
+ case ICH_HFGRTR_EL2:
+ return &ich_hfgrtr_masks;
+ case ICH_HFGWTR_EL2:
+ return &ich_hfgwtr_masks;
+ case ICH_HFGITR_EL2:
+ return &ich_hfgitr_masks;
default:
BUILD_BUG_ON(1);
}
@@ -1606,6 +1684,32 @@ static void __compute_hdfgwtr(struct kvm_vcpu *vcpu)
*vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1;
}
+static void __compute_ich_hfgrtr(struct kvm_vcpu *vcpu)
+{
+ __compute_fgt(vcpu, ICH_HFGRTR_EL2);
+
+ /*
+ * ICC_IAFFIDR_EL1 *always* needs to be trapped when running a guest.
+ *
+ * We also trap accesses to ICC_IDR0_EL1 to allow us to completely hide
+ * FEAT_GCIE_LEGACY from the guest, and to (potentially) present fewer
+ * ID bits than the host supports.
+ */
+ *vcpu_fgt(vcpu, ICH_HFGRTR_EL2) &= ~(ICH_HFGRTR_EL2_ICC_IAFFIDR_EL1 |
+ ICH_HFGRTR_EL2_ICC_IDRn_EL1);
+}
+
+static void __compute_ich_hfgwtr(struct kvm_vcpu *vcpu)
+{
+ __compute_fgt(vcpu, ICH_HFGWTR_EL2);
+
+ /*
+ * We present a different subset of PPIs the guest from what
+ * exist in real hardware. We only trap writes, not reads.
+ */
+ *vcpu_fgt(vcpu, ICH_HFGWTR_EL2) &= ~(ICH_HFGWTR_EL2_ICC_PPI_ENABLERn_EL1);
+}
+
void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu)
{
if (!cpus_have_final_cap(ARM64_HAS_FGT))
@@ -1618,12 +1722,17 @@ void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu)
__compute_hdfgwtr(vcpu);
__compute_fgt(vcpu, HAFGRTR_EL2);
- if (!cpus_have_final_cap(ARM64_HAS_FGT2))
- return;
+ if (cpus_have_final_cap(ARM64_HAS_FGT2)) {
+ __compute_fgt(vcpu, HFGRTR2_EL2);
+ __compute_fgt(vcpu, HFGWTR2_EL2);
+ __compute_fgt(vcpu, HFGITR2_EL2);
+ __compute_fgt(vcpu, HDFGRTR2_EL2);
+ __compute_fgt(vcpu, HDFGWTR2_EL2);
+ }
- __compute_fgt(vcpu, HFGRTR2_EL2);
- __compute_fgt(vcpu, HFGWTR2_EL2);
- __compute_fgt(vcpu, HFGITR2_EL2);
- __compute_fgt(vcpu, HDFGRTR2_EL2);
- __compute_fgt(vcpu, HDFGWTR2_EL2);
+ if (cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
+ __compute_ich_hfgrtr(vcpu);
+ __compute_ich_hfgwtr(vcpu);
+ __compute_fgt(vcpu, ICH_HFGITR_EL2);
+ }
}
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 22d497554c94..dba7ced74ca5 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2053,6 +2053,60 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
SR_FGT(SYS_AMEVCNTR0_EL0(2), HAFGRTR, AMEVCNTR02_EL0, 1),
SR_FGT(SYS_AMEVCNTR0_EL0(1), HAFGRTR, AMEVCNTR01_EL0, 1),
SR_FGT(SYS_AMEVCNTR0_EL0(0), HAFGRTR, AMEVCNTR00_EL0, 1),
+
+ /*
+ * ICH_HFGRTR_EL2 & ICH_HFGWTR_EL2
+ */
+ SR_FGT(SYS_ICC_APR_EL1, ICH_HFGRTR, ICC_APR_EL1, 0),
+ SR_FGT(SYS_ICC_IDR0_EL1, ICH_HFGRTR, ICC_IDRn_EL1, 0),
+ SR_FGT(SYS_ICC_CR0_EL1, ICH_HFGRTR, ICC_CR0_EL1, 0),
+ SR_FGT(SYS_ICC_HPPIR_EL1, ICH_HFGRTR, ICC_HPPIR_EL1, 0),
+ SR_FGT(SYS_ICC_PCR_EL1, ICH_HFGRTR, ICC_PCR_EL1, 0),
+ SR_FGT(SYS_ICC_ICSR_EL1, ICH_HFGRTR, ICC_ICSR_EL1, 0),
+ SR_FGT(SYS_ICC_IAFFIDR_EL1, ICH_HFGRTR, ICC_IAFFIDR_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_HMR0_EL1, ICH_HFGRTR, ICC_PPI_HMRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_HMR1_EL1, ICH_HFGRTR, ICC_PPI_HMRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_ENABLER0_EL1, ICH_HFGRTR, ICC_PPI_ENABLERn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_ENABLER1_EL1, ICH_HFGRTR, ICC_PPI_ENABLERn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_CPENDR0_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_CPENDR1_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_SPENDR0_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_SPENDR1_EL1, ICH_HFGRTR, ICC_PPI_PENDRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR0_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR1_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR2_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR3_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR4_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR5_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR6_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR7_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR8_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR9_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR10_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR11_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR12_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR13_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR14_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_PRIORITYR15_EL1, ICH_HFGRTR, ICC_PPI_PRIORITYRn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_CACTIVER0_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_CACTIVER1_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_SACTIVER0_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
+ SR_FGT(SYS_ICC_PPI_SACTIVER1_EL1, ICH_HFGRTR, ICC_PPI_ACTIVERn_EL1, 0),
+
+ /*
+ * ICH_HFGITR_EL2
+ */
+ SR_FGT(GICV5_OP_GIC_CDEN, ICH_HFGITR, GICCDEN, 0),
+ SR_FGT(GICV5_OP_GIC_CDDIS, ICH_HFGITR, GICCDDIS, 0),
+ SR_FGT(GICV5_OP_GIC_CDPRI, ICH_HFGITR, GICCDPRI, 0),
+ SR_FGT(GICV5_OP_GIC_CDAFF, ICH_HFGITR, GICCDAFF, 0),
+ SR_FGT(GICV5_OP_GIC_CDPEND, ICH_HFGITR, GICCDPEND, 0),
+ SR_FGT(GICV5_OP_GIC_CDRCFG, ICH_HFGITR, GICCDRCFG, 0),
+ SR_FGT(GICV5_OP_GIC_CDHM, ICH_HFGITR, GICCDHM, 0),
+ SR_FGT(GICV5_OP_GIC_CDEOI, ICH_HFGITR, GICCDEOI, 0),
+ SR_FGT(GICV5_OP_GIC_CDDI, ICH_HFGITR, GICCDDI, 0),
+ SR_FGT(GICV5_OP_GICR_CDIA, ICH_HFGITR, GICRCDIA, 0),
+ SR_FGT(GICV5_OP_GICR_CDNMIA, ICH_HFGITR, GICRCDNMIA, 0),
};
/*
@@ -2127,6 +2181,9 @@ FGT_MASKS(hfgwtr2_masks, HFGWTR2_EL2);
FGT_MASKS(hfgitr2_masks, HFGITR2_EL2);
FGT_MASKS(hdfgrtr2_masks, HDFGRTR2_EL2);
FGT_MASKS(hdfgwtr2_masks, HDFGWTR2_EL2);
+FGT_MASKS(ich_hfgrtr_masks, ICH_HFGRTR_EL2);
+FGT_MASKS(ich_hfgwtr_masks, ICH_HFGWTR_EL2);
+FGT_MASKS(ich_hfgitr_masks, ICH_HFGITR_EL2);
static __init bool aggregate_fgt(union trap_config tc)
{
@@ -2162,6 +2219,14 @@ static __init bool aggregate_fgt(union trap_config tc)
rmasks = &hfgitr2_masks;
wmasks = NULL;
break;
+ case ICH_HFGRTR_GROUP:
+ rmasks = &ich_hfgrtr_masks;
+ wmasks = &ich_hfgwtr_masks;
+ break;
+ case ICH_HFGITR_GROUP:
+ rmasks = &ich_hfgitr_masks;
+ wmasks = NULL;
+ break;
}
rresx = rmasks->res0 | rmasks->res1;
@@ -2232,6 +2297,9 @@ static __init int check_all_fgt_masks(int ret)
&hfgitr2_masks,
&hdfgrtr2_masks,
&hdfgwtr2_masks,
+ &ich_hfgrtr_masks,
+ &ich_hfgwtr_masks,
+ &ich_hfgitr_masks,
};
int err = 0;
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 2597e8bda867..ae04fd680d1e 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -233,6 +233,18 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
__activate_fgt(hctxt, vcpu, HDFGWTR2_EL2);
}
+static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return;
+
+ __activate_fgt(hctxt, vcpu, ICH_HFGRTR_EL2);
+ __activate_fgt(hctxt, vcpu, ICH_HFGWTR_EL2);
+ __activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
+}
+
#define __deactivate_fgt(htcxt, vcpu, reg) \
do { \
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
@@ -265,6 +277,19 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
__deactivate_fgt(hctxt, vcpu, HDFGWTR2_EL2);
}
+static inline void __deactivate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
+
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return;
+
+ __deactivate_fgt(hctxt, vcpu, ICH_HFGRTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, ICH_HFGWTR_EL2);
+ __deactivate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
+
+}
+
static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu)
{
u64 r = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1;
@@ -328,6 +353,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
}
__activate_traps_hfgxtr(vcpu);
+ __activate_traps_ich_hfgxtr(vcpu);
__activate_traps_mpam(vcpu);
}
@@ -345,6 +371,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2);
__deactivate_traps_hfgxtr(vcpu);
+ __deactivate_traps_ich_hfgxtr(vcpu);
__deactivate_traps_mpam();
}
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 3d33fbefdfc1..62cdfbff7562 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -26,7 +26,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
+ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o ../vgic-v5-sr.o
hyp-obj-y += ../../../kernel/smccc-call.o
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o events.o
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index eff9cb208627..31f080307d95 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -650,6 +650,20 @@ static void handle___tracing_write_event(struct kvm_cpu_context *host_ctxt)
trace_selftest(id);
}
+static void handle___vgic_v5_save_apr(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
+
+ __vgic_v5_save_apr(kern_hyp_va(cpu_if));
+}
+
+static void handle___vgic_v5_restore_vmcr_apr(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
+
+ __vgic_v5_restore_vmcr_apr(kern_hyp_va(cpu_if));
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -683,6 +697,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
+ HANDLE_FUNC(__vgic_v5_save_apr),
+ HANDLE_FUNC(__vgic_v5_restore_vmcr_apr),
HANDLE_FUNC(__pkvm_reserve_vm),
HANDLE_FUNC(__pkvm_unreserve_vm),
HANDLE_FUNC(__pkvm_init_vm),
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index ca60721501d1..ed8830c707b0 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -44,6 +44,9 @@ struct fgt_masks hfgwtr2_masks;
struct fgt_masks hfgitr2_masks;
struct fgt_masks hdfgrtr2_masks;
struct fgt_masks hdfgwtr2_masks;
+struct fgt_masks ich_hfgrtr_masks;
+struct fgt_masks ich_hfgwtr_masks;
+struct fgt_masks ich_hfgitr_masks;
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
@@ -110,6 +113,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
/* Save VGICv3 state on non-VHE systems */
static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{
+ if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
+ __vgic_v5_save_state(&vcpu->arch.vgic_cpu.vgic_v5);
+ __vgic_v5_save_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5);
+ return;
+ }
+
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
@@ -119,6 +128,12 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
/* Restore VGICv3 state on non-VHE systems */
static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
+ if (vgic_is_v5(kern_hyp_va(vcpu->kvm))) {
+ __vgic_v5_restore_state(&vcpu->arch.vgic_cpu.vgic_v5);
+ __vgic_v5_restore_ppi_state(&vcpu->arch.vgic_cpu.vgic_v5);
+ return;
+ }
+
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index 06d28621722e..b40fd01ebf32 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -20,6 +20,7 @@
*/
u64 id_aa64pfr0_el1_sys_val;
u64 id_aa64pfr1_el1_sys_val;
+u64 id_aa64pfr2_el1_sys_val;
u64 id_aa64isar0_el1_sys_val;
u64 id_aa64isar1_el1_sys_val;
u64 id_aa64isar2_el1_sys_val;
@@ -108,6 +109,11 @@ static const struct pvm_ftr_bits pvmid_aa64pfr1[] = {
FEAT_END
};
+static const struct pvm_ftr_bits pvmid_aa64pfr2[] = {
+ MAX_FEAT(ID_AA64PFR2_EL1, GCIE, NI),
+ FEAT_END
+};
+
static const struct pvm_ftr_bits pvmid_aa64mmfr0[] = {
MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, PARANGE, 40),
MAX_FEAT_ENUM(ID_AA64MMFR0_EL1, ASIDBITS, 16),
@@ -221,6 +227,8 @@ static u64 pvm_calc_id_reg(const struct kvm_vcpu *vcpu, u32 id)
return get_restricted_features(vcpu, id_aa64pfr0_el1_sys_val, pvmid_aa64pfr0);
case SYS_ID_AA64PFR1_EL1:
return get_restricted_features(vcpu, id_aa64pfr1_el1_sys_val, pvmid_aa64pfr1);
+ case SYS_ID_AA64PFR2_EL1:
+ return get_restricted_features(vcpu, id_aa64pfr2_el1_sys_val, pvmid_aa64pfr2);
case SYS_ID_AA64ISAR0_EL1:
return id_aa64isar0_el1_sys_val;
case SYS_ID_AA64ISAR1_EL1:
diff --git a/arch/arm64/kvm/hyp/vgic-v5-sr.c b/arch/arm64/kvm/hyp/vgic-v5-sr.c
new file mode 100644
index 000000000000..2c4304ffa9f3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v5-sr.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, 2026 - Arm Ltd
+ */
+
+#include <linux/irqchip/arm-gic-v5.h>
+
+#include <asm/kvm_hyp.h>
+
+void __vgic_v5_save_apr(struct vgic_v5_cpu_if *cpu_if)
+{
+ cpu_if->vgic_apr = read_sysreg_s(SYS_ICH_APR_EL2);
+}
+
+static void __vgic_v5_compat_mode_disable(void)
+{
+ sysreg_clear_set_s(SYS_ICH_VCTLR_EL2, ICH_VCTLR_EL2_V3, 0);
+ isb();
+}
+
+void __vgic_v5_restore_vmcr_apr(struct vgic_v5_cpu_if *cpu_if)
+{
+ __vgic_v5_compat_mode_disable();
+
+ write_sysreg_s(cpu_if->vgic_vmcr, SYS_ICH_VMCR_EL2);
+ write_sysreg_s(cpu_if->vgic_apr, SYS_ICH_APR_EL2);
+}
+
+void __vgic_v5_save_ppi_state(struct vgic_v5_cpu_if *cpu_if)
+{
+ /*
+ * The following code assumes that the bitmap storage that we have for
+ * PPIs is either 64 (architected PPIs, only) or 128 bits (architected &
+ * impdef PPIs).
+ */
+ BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
+
+ bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
+ read_sysreg_s(SYS_ICH_PPI_ACTIVER0_EL2), 0, 64);
+ bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr_exit,
+ read_sysreg_s(SYS_ICH_PPI_PENDR0_EL2), 0, 64);
+
+ cpu_if->vgic_ppi_priorityr[0] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR0_EL2);
+ cpu_if->vgic_ppi_priorityr[1] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR1_EL2);
+ cpu_if->vgic_ppi_priorityr[2] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR2_EL2);
+ cpu_if->vgic_ppi_priorityr[3] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR3_EL2);
+ cpu_if->vgic_ppi_priorityr[4] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR4_EL2);
+ cpu_if->vgic_ppi_priorityr[5] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR5_EL2);
+ cpu_if->vgic_ppi_priorityr[6] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR6_EL2);
+ cpu_if->vgic_ppi_priorityr[7] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR7_EL2);
+
+ if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
+ bitmap_write(host_data_ptr(vgic_v5_ppi_state)->activer_exit,
+ read_sysreg_s(SYS_ICH_PPI_ACTIVER1_EL2), 64, 64);
+ bitmap_write(host_data_ptr(vgic_v5_ppi_state)->pendr_exit,
+ read_sysreg_s(SYS_ICH_PPI_PENDR1_EL2), 64, 64);
+
+ cpu_if->vgic_ppi_priorityr[8] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR8_EL2);
+ cpu_if->vgic_ppi_priorityr[9] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR9_EL2);
+ cpu_if->vgic_ppi_priorityr[10] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR10_EL2);
+ cpu_if->vgic_ppi_priorityr[11] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR11_EL2);
+ cpu_if->vgic_ppi_priorityr[12] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR12_EL2);
+ cpu_if->vgic_ppi_priorityr[13] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR13_EL2);
+ cpu_if->vgic_ppi_priorityr[14] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR14_EL2);
+ cpu_if->vgic_ppi_priorityr[15] = read_sysreg_s(SYS_ICH_PPI_PRIORITYR15_EL2);
+ }
+
+ /* Now that we are done, disable DVI */
+ write_sysreg_s(0, SYS_ICH_PPI_DVIR0_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
+}
+
+void __vgic_v5_restore_ppi_state(struct vgic_v5_cpu_if *cpu_if)
+{
+ DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
+
+ /* We assume 64 or 128 PPIs - see above comment */
+ BUILD_BUG_ON(VGIC_V5_NR_PRIVATE_IRQS % 64);
+
+ /* Enable DVI so that the guest's interrupt config takes over */
+ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 0, 64),
+ SYS_ICH_PPI_DVIR0_EL2);
+
+ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 0, 64),
+ SYS_ICH_PPI_ACTIVER0_EL2);
+ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 0, 64),
+ SYS_ICH_PPI_ENABLER0_EL2);
+
+ /* Update the pending state of the NON-DVI'd PPIs, only */
+ bitmap_andnot(pendr, host_data_ptr(vgic_v5_ppi_state)->pendr_entry,
+ cpu_if->vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS);
+ write_sysreg_s(bitmap_read(pendr, 0, 64), SYS_ICH_PPI_PENDR0_EL2);
+
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[0],
+ SYS_ICH_PPI_PRIORITYR0_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[1],
+ SYS_ICH_PPI_PRIORITYR1_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[2],
+ SYS_ICH_PPI_PRIORITYR2_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[3],
+ SYS_ICH_PPI_PRIORITYR3_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[4],
+ SYS_ICH_PPI_PRIORITYR4_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[5],
+ SYS_ICH_PPI_PRIORITYR5_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[6],
+ SYS_ICH_PPI_PRIORITYR6_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[7],
+ SYS_ICH_PPI_PRIORITYR7_EL2);
+
+ if (VGIC_V5_NR_PRIVATE_IRQS == 128) {
+ /* Enable DVI so that the guest's interrupt config takes over */
+ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_dvir, 64, 64),
+ SYS_ICH_PPI_DVIR1_EL2);
+
+ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_activer, 64, 64),
+ SYS_ICH_PPI_ACTIVER1_EL2);
+ write_sysreg_s(bitmap_read(cpu_if->vgic_ppi_enabler, 64, 64),
+ SYS_ICH_PPI_ENABLER1_EL2);
+ write_sysreg_s(bitmap_read(pendr, 64, 64),
+ SYS_ICH_PPI_PENDR1_EL2);
+
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[8],
+ SYS_ICH_PPI_PRIORITYR8_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[9],
+ SYS_ICH_PPI_PRIORITYR9_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[10],
+ SYS_ICH_PPI_PRIORITYR10_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[11],
+ SYS_ICH_PPI_PRIORITYR11_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[12],
+ SYS_ICH_PPI_PRIORITYR12_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[13],
+ SYS_ICH_PPI_PRIORITYR13_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[14],
+ SYS_ICH_PPI_PRIORITYR14_EL2);
+ write_sysreg_s(cpu_if->vgic_ppi_priorityr[15],
+ SYS_ICH_PPI_PRIORITYR15_EL2);
+ } else {
+ write_sysreg_s(0, SYS_ICH_PPI_DVIR1_EL2);
+
+ write_sysreg_s(0, SYS_ICH_PPI_ACTIVER1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_ENABLER1_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PENDR1_EL2);
+
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR8_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR9_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR10_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR11_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR12_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR13_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR14_EL2);
+ write_sysreg_s(0, SYS_ICH_PPI_PRIORITYR15_EL2);
+ }
+}
+
+void __vgic_v5_save_state(struct vgic_v5_cpu_if *cpu_if)
+{
+ cpu_if->vgic_vmcr = read_sysreg_s(SYS_ICH_VMCR_EL2);
+ cpu_if->vgic_icsr = read_sysreg_s(SYS_ICC_ICSR_EL1);
+}
+
+void __vgic_v5_restore_state(struct vgic_v5_cpu_if *cpu_if)
+{
+ write_sysreg_s(cpu_if->vgic_icsr, SYS_ICC_ICSR_EL1);
+}
diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
index afc4aed9231a..9695328bbd96 100644
--- a/arch/arm64/kvm/hyp/vhe/Makefile
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -10,4 +10,4 @@ CFLAGS_switch.o += -Wno-override-init
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
- ../fpsimd.o ../hyp-entry.o ../exception.o
+ ../fpsimd.o ../hyp-entry.o ../exception.o ../vgic-v5-sr.o
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 2c43097248b2..efd5d21c7ac7 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1558,6 +1558,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
ID_AA64PFR1_EL1_MTE);
break;
+ case SYS_ID_AA64PFR2_EL1:
+ /* GICv5 is not yet supported for NV */
+ val &= ~ID_AA64PFR2_EL1_GCIE;
+ break;
+
case SYS_ID_AA64MMFR0_EL1:
/* Hide ExS, Secure Memory */
val &= ~(ID_AA64MMFR0_EL1_EXS |
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 93cc9bbb5cec..e1860acae641 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -939,7 +939,8 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
* number against the dimensions of the vgic and make sure
* it's valid.
*/
- if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
+ if (!irq_is_ppi(vcpu->kvm, irq) &&
+ !vgic_valid_spi(vcpu->kvm, irq))
return -EINVAL;
} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
return -EINVAL;
@@ -961,8 +962,13 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
if (!vgic_initialized(vcpu->kvm))
return -ENODEV;
- if (!kvm_arm_pmu_irq_initialized(vcpu))
- return -ENXIO;
+ if (!kvm_arm_pmu_irq_initialized(vcpu)) {
+ if (!vgic_is_v5(vcpu->kvm))
+ return -ENXIO;
+
+ /* Use the architected irq number for GICv5. */
+ vcpu->arch.pmu.irq_num = KVM_ARMV8_PMU_GICV5_IRQ;
+ }
ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
&vcpu->arch.pmu);
@@ -987,11 +993,15 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
unsigned long i;
struct kvm_vcpu *vcpu;
+ /* On GICv5, the PMUIRQ is architecturally mandated to be PPI 23 */
+ if (vgic_is_v5(kvm) && irq != KVM_ARMV8_PMU_GICV5_IRQ)
+ return false;
+
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!kvm_arm_pmu_irq_initialized(vcpu))
continue;
- if (irq_is_ppi(irq)) {
+ if (irq_is_ppi(vcpu->kvm, irq)) {
if (vcpu->arch.pmu.irq_num != irq)
return false;
} else {
@@ -1142,7 +1152,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EFAULT;
/* The PMU overflow interrupt can be a PPI or a valid SPI. */
- if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
+ if (!(irq_is_ppi(vcpu->kvm, irq) || irq_is_spi(vcpu->kvm, irq)))
return -EINVAL;
if (!pmu_irq_is_valid(kvm, irq))
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1b4cacb6e918..e1001544d4f4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -681,6 +681,91 @@ static bool access_gic_dir(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_gicv5_idr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return undef_access(vcpu, p, r);
+
+ /*
+ * Expose KVM's priority- and ID-bits to the guest, but not GCIE_LEGACY.
+ *
+ * Note: for GICv5 the mimic the way that the num_pri_bits and
+ * num_id_bits fields are used with GICv3:
+ * - num_pri_bits stores the actual number of priority bits, whereas the
+ * register field stores num_pri_bits - 1.
+ * - num_id_bits stores the raw field value, which is 0b0000 for 16 bits
+ * and 0b0001 for 24 bits.
+ */
+ p->regval = FIELD_PREP(ICC_IDR0_EL1_PRI_BITS, vcpu->arch.vgic_cpu.num_pri_bits - 1) |
+ FIELD_PREP(ICC_IDR0_EL1_ID_BITS, vcpu->arch.vgic_cpu.num_id_bits);
+
+ return true;
+}
+
+static bool access_gicv5_iaffid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return undef_access(vcpu, p, r);
+
+ /*
+ * For GICv5 VMs, the IAFFID value is the same as the VPE ID. The VPE ID
+ * is the same as the VCPU's ID.
+ */
+ p->regval = FIELD_PREP(ICC_IAFFIDR_EL1_IAFFID, vcpu->vcpu_id);
+
+ return true;
+}
+
+static bool access_gicv5_ppi_enabler(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ unsigned long *mask = vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask;
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ int i;
+
+ /* We never expect to get here with a read! */
+ if (WARN_ON_ONCE(!p->is_write))
+ return undef_access(vcpu, p, r);
+
+ /*
+ * If we're only handling architected PPIs and the guest writes to the
+ * enable for the non-architected PPIs, we just return as there's
+ * nothing to do at all. We don't even allocate the storage for them in
+ * this case.
+ */
+ if (VGIC_V5_NR_PRIVATE_IRQS == 64 && p->Op2 % 2)
+ return true;
+
+ /*
+ * Merge the raw guest write into out bitmap at an offset of either 0 or
+ * 64, then and it with our PPI mask.
+ */
+ bitmap_write(cpu_if->vgic_ppi_enabler, p->regval, 64 * (p->Op2 % 2), 64);
+ bitmap_and(cpu_if->vgic_ppi_enabler, cpu_if->vgic_ppi_enabler, mask,
+ VGIC_V5_NR_PRIVATE_IRQS);
+
+ /*
+ * Sync the change in enable states to the vgic_irqs. We consider all
+ * PPIs as we don't expose many to the guest.
+ */
+ for_each_set_bit(i, mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ u32 intid = vgic_v5_make_ppi(i);
+ struct vgic_irq *irq;
+
+ irq = vgic_get_vcpu_irq(vcpu, intid);
+
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+ irq->enabled = test_bit(i, cpu_if->vgic_ppi_enabler);
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return true;
+}
+
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -1758,6 +1843,7 @@ static u8 pmuver_to_perfmon(u8 pmuver)
static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val);
+static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val);
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val);
/* Read a sanitised cpufeature ID register by sys_reg_desc */
@@ -1783,10 +1869,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val = sanitise_id_aa64pfr1_el1(vcpu, val);
break;
case SYS_ID_AA64PFR2_EL1:
- val &= ID_AA64PFR2_EL1_FPMR |
- (kvm_has_mte(vcpu->kvm) ?
- ID_AA64PFR2_EL1_MTEFAR | ID_AA64PFR2_EL1_MTESTOREONLY :
- 0);
+ val = sanitise_id_aa64pfr2_el1(vcpu, val);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1985,7 +2068,7 @@ static u64 sanitise_id_aa64pfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, CSV3, IMP);
}
- if (vgic_is_v3(vcpu->kvm)) {
+ if (vgic_host_has_gicv3()) {
val &= ~ID_AA64PFR0_EL1_GIC_MASK;
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
}
@@ -2027,6 +2110,23 @@ static u64 sanitise_id_aa64pfr1_el1(const struct kvm_vcpu *vcpu, u64 val)
return val;
}
+static u64 sanitise_id_aa64pfr2_el1(const struct kvm_vcpu *vcpu, u64 val)
+{
+ val &= ID_AA64PFR2_EL1_FPMR |
+ ID_AA64PFR2_EL1_MTEFAR |
+ ID_AA64PFR2_EL1_MTESTOREONLY;
+
+ if (!kvm_has_mte(vcpu->kvm)) {
+ val &= ~ID_AA64PFR2_EL1_MTEFAR;
+ val &= ~ID_AA64PFR2_EL1_MTESTOREONLY;
+ }
+
+ if (vgic_host_has_gicv5())
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP);
+
+ return val;
+}
+
static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
{
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
@@ -2177,14 +2277,6 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
(vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val)))
return -EINVAL;
- /*
- * If we are running on a GICv5 host and support FEAT_GCIE_LEGACY, then
- * we support GICv3. Fail attempts to do anything but set that to IMP.
- */
- if (vgic_is_v3_compat(vcpu->kvm) &&
- FIELD_GET(ID_AA64PFR0_EL1_GIC_MASK, user_val) != ID_AA64PFR0_EL1_GIC_IMP)
- return -EINVAL;
-
return set_id_reg(vcpu, rd, user_val);
}
@@ -2224,6 +2316,12 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu,
return set_id_reg(vcpu, rd, user_val);
}
+static int set_id_aa64pfr2_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ return set_id_reg(vcpu, rd, user_val);
+}
+
/*
* Allow userspace to de-feature a stage-2 translation granule but prevent it
* from claiming the impossible.
@@ -3205,10 +3303,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR1_EL1_RES0 |
ID_AA64PFR1_EL1_MPAM_frac |
ID_AA64PFR1_EL1_MTE)),
- ID_WRITABLE(ID_AA64PFR2_EL1,
- ID_AA64PFR2_EL1_FPMR |
- ID_AA64PFR2_EL1_MTEFAR |
- ID_AA64PFR2_EL1_MTESTOREONLY),
+ ID_FILTERED(ID_AA64PFR2_EL1, id_aa64pfr2_el1,
+ ~(ID_AA64PFR2_EL1_FPMR |
+ ID_AA64PFR2_EL1_MTEFAR |
+ ID_AA64PFR2_EL1_MTESTOREONLY |
+ ID_AA64PFR2_EL1_GCIE)),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
@@ -3391,6 +3490,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_IDR0_EL1), access_gicv5_idr0 },
+ { SYS_DESC(SYS_ICC_IAFFIDR_EL1), access_gicv5_iaffid },
+ { SYS_DESC(SYS_ICC_PPI_ENABLER0_EL1), access_gicv5_ppi_enabler },
+ { SYS_DESC(SYS_ICC_PPI_ENABLER1_EL1), access_gicv5_ppi_enabler },
{ SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
{ SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
@@ -5647,6 +5750,8 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
compute_fgu(kvm, HFGRTR2_GROUP);
compute_fgu(kvm, HFGITR2_GROUP);
compute_fgu(kvm, HDFGRTR2_GROUP);
+ compute_fgu(kvm, ICH_HFGRTR_GROUP);
+ compute_fgu(kvm, ICH_HFGITR_GROUP);
set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags);
out:
@@ -5667,17 +5772,52 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
guard(mutex)(&kvm->arch.config_lock);
+ if (kvm_vm_has_ran_once(kvm))
+ return 0;
+
/*
* This hacks into the ID registers, so only perform it when the
* first vcpu runs, or the kvm_set_vm_id_reg() helper will scream.
*/
- if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) {
+ if (!irqchip_in_kernel(kvm)) {
u64 val;
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
+ val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE;
+ kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, val);
val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
+ } else {
+ /*
+ * Certain userspace software - QEMU - samples the system
+ * register state without creating an irqchip, then blindly
+ * restores the state prior to running the final guest. This
+ * means that it restores the virtualization & emulation
+ * capabilities of the host system, rather than something that
+ * reflects the final guest state. Moreover, it checks that the
+ * state was "correctly" restored (i.e., verbatim), bailing if
+ * it isn't, so masking off invalid state isn't an option.
+ *
+ * On GICv5 hardware that supports FEAT_GCIE_LEGACY we can run
+ * both GICv3- and GICv5-based guests. Therefore, we initially
+ * present both ID_AA64PFR0.GIC and ID_AA64PFR2.GCIE as IMP to
+ * reflect that userspace can create EITHER a vGICv3 or a
+ * vGICv5. This is an architecturally invalid combination, of
+ * course. Once an in-kernel GIC is created, the sysreg state is
+ * updated to reflect the actual, valid configuration.
+ *
+ * Setting both the GIC and GCIE features to IMP unsurprisingly
+ * results in guests falling over, and hence we need to fix up
+ * this mess in KVM. Before running for the first time we yet
+ * again ensure that the GIC and GCIE fields accurately reflect
+ * the actual hardware the guest should see.
+ *
+ * This hack allows legacy QEMU-based GICv3 guests to run
+ * unmodified on compatible GICv5 hosts, and avoids the inverse
+ * problem for GICv5-based guests in the future.
+ */
+ kvm_vgic_finalize_idregs(kvm);
}
if (vcpu_has_nv(vcpu)) {
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index e9b8b5fc480c..47169604100f 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -66,12 +66,11 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
* or through the generic KVM_CREATE_DEVICE API ioctl.
* irqchip_in_kernel() tells you if this function succeeded or not.
* @kvm: kvm struct pointer
- * @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
+ * @type: KVM_DEV_TYPE_ARM_VGIC_V[235]
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
struct kvm_vcpu *vcpu;
- u64 aa64pfr0, pfr1;
unsigned long i;
int ret;
@@ -132,8 +131,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
kvm->max_vcpus = VGIC_V2_MAX_CPUS;
- else
+ else if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
kvm->max_vcpus = VGIC_V3_MAX_CPUS;
+ else if (type == KVM_DEV_TYPE_ARM_VGIC_V5)
+ kvm->max_vcpus = min(VGIC_V5_MAX_CPUS,
+ kvm_vgic_global_state.max_gic_vcpus);
if (atomic_read(&kvm->online_vcpus) > kvm->max_vcpus) {
ret = -E2BIG;
@@ -145,19 +147,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
- aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
- pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
-
- if (type == KVM_DEV_TYPE_ARM_VGIC_V2) {
- kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
- } else {
- INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
- aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
- pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
- }
-
- kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
- kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
+ /*
+ * We've now created the GIC. Update the system register state
+ * to accurately reflect what we've created.
+ */
+ kvm_vgic_finalize_idregs(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
ret = vgic_allocate_private_irqs_locked(vcpu, type);
@@ -179,6 +173,15 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
+ /*
+ * We now know that we have a GICv5. The Arch Timer PPI interrupts may
+ * have been initialised at this stage, but will have done so assuming
+ * that we have an older GIC, meaning that the IntIDs won't be
+ * correct. We init them again, and this time they will be correct.
+ */
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V5)
+ kvm_timer_init_vm(kvm);
+
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
kvm_unlock_all_vcpus(kvm);
@@ -259,9 +262,65 @@ int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
return ret;
}
+static void vgic_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
+{
+ struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ raw_spin_lock_init(&irq->irq_lock);
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ refcount_set(&irq->refcount, 0);
+
+ irq->intid = i;
+ if (vgic_irq_is_sgi(i)) {
+ /* SGIs */
+ irq->enabled = 1;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ irq->group = 1;
+ irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ irq->group = 0;
+ irq->targets = BIT(vcpu->vcpu_id);
+ break;
+ }
+}
+
+static void vgic_v5_allocate_private_irq(struct kvm_vcpu *vcpu, int i, u32 type)
+{
+ struct vgic_irq *irq = &vcpu->arch.vgic_cpu.private_irqs[i];
+ u32 intid = vgic_v5_make_ppi(i);
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ raw_spin_lock_init(&irq->irq_lock);
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ refcount_set(&irq->refcount, 0);
+
+ irq->intid = intid;
+
+ /* The only Edge architected PPI is the SW_PPI */
+ if (i == GICV5_ARCH_PPI_SW_PPI)
+ irq->config = VGIC_CONFIG_EDGE;
+ else
+ irq->config = VGIC_CONFIG_LEVEL;
+
+ /* Register the GICv5-specific PPI ops */
+ vgic_v5_set_ppi_ops(vcpu, intid);
+}
+
static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u32 num_private_irqs;
int i;
lockdep_assert_held(&vcpu->kvm->arch.config_lock);
@@ -269,8 +328,13 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
if (vgic_cpu->private_irqs)
return 0;
+ if (vgic_is_v5(vcpu->kvm))
+ num_private_irqs = VGIC_V5_NR_PRIVATE_IRQS;
+ else
+ num_private_irqs = VGIC_NR_PRIVATE_IRQS;
+
vgic_cpu->private_irqs = kzalloc_objs(struct vgic_irq,
- VGIC_NR_PRIVATE_IRQS,
+ num_private_irqs,
GFP_KERNEL_ACCOUNT);
if (!vgic_cpu->private_irqs)
@@ -280,34 +344,11 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
* Enable and configure all SGIs to be edge-triggered and
* configure all PPIs as level-triggered.
*/
- for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
-
- INIT_LIST_HEAD(&irq->ap_list);
- raw_spin_lock_init(&irq->irq_lock);
- irq->intid = i;
- irq->vcpu = NULL;
- irq->target_vcpu = vcpu;
- refcount_set(&irq->refcount, 0);
- if (vgic_irq_is_sgi(i)) {
- /* SGIs */
- irq->enabled = 1;
- irq->config = VGIC_CONFIG_EDGE;
- } else {
- /* PPIs */
- irq->config = VGIC_CONFIG_LEVEL;
- }
-
- switch (type) {
- case KVM_DEV_TYPE_ARM_VGIC_V3:
- irq->group = 1;
- irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
- break;
- case KVM_DEV_TYPE_ARM_VGIC_V2:
- irq->group = 0;
- irq->targets = BIT(vcpu->vcpu_id);
- break;
- }
+ for (i = 0; i < num_private_irqs; i++) {
+ if (vgic_is_v5(vcpu->kvm))
+ vgic_v5_allocate_private_irq(vcpu, i, type);
+ else
+ vgic_allocate_private_irq(vcpu, i, type);
}
return 0;
@@ -366,7 +407,11 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu)
{
- if (kvm_vgic_global_state.type == VGIC_V2)
+ const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V5)
+ vgic_v5_reset(vcpu);
+ else if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_reset(vcpu);
else
vgic_v3_reset(vcpu);
@@ -397,22 +442,28 @@ int vgic_init(struct kvm *kvm)
if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
return -EBUSY;
- /* freeze the number of spis */
- if (!dist->nr_spis)
- dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
+ if (!vgic_is_v5(kvm)) {
+ /* freeze the number of spis */
+ if (!dist->nr_spis)
+ dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
- ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
- if (ret)
- goto out;
+ ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
+ if (ret)
+ return ret;
- /*
- * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
- * vLPIs) is supported.
- */
- if (vgic_supports_direct_irqs(kvm)) {
- ret = vgic_v4_init(kvm);
+ /*
+ * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
+ * vLPIs) is supported.
+ */
+ if (vgic_supports_direct_irqs(kvm)) {
+ ret = vgic_v4_init(kvm);
+ if (ret)
+ return ret;
+ }
+ } else {
+ ret = vgic_v5_init(kvm);
if (ret)
- goto out;
+ return ret;
}
kvm_for_each_vcpu(idx, vcpu, kvm)
@@ -420,12 +471,12 @@ int vgic_init(struct kvm *kvm)
ret = kvm_vgic_setup_default_irq_routing(kvm);
if (ret)
- goto out;
+ return ret;
vgic_debug_init(kvm);
dist->initialized = true;
-out:
- return ret;
+
+ return 0;
}
static void kvm_vgic_dist_destroy(struct kvm *kvm)
@@ -569,6 +620,7 @@ int vgic_lazy_init(struct kvm *kvm)
int kvm_vgic_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ bool needs_dist = true;
enum vgic_type type;
gpa_t dist_base;
int ret = 0;
@@ -587,12 +639,16 @@ int kvm_vgic_map_resources(struct kvm *kvm)
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
ret = vgic_v2_map_resources(kvm);
type = VGIC_V2;
- } else {
+ } else if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
ret = vgic_v3_map_resources(kvm);
type = VGIC_V3;
+ } else {
+ ret = vgic_v5_map_resources(kvm);
+ type = VGIC_V5;
+ needs_dist = false;
}
- if (ret)
+ if (ret || !needs_dist)
goto out;
dist_base = dist->vgic_dist_base;
@@ -617,6 +673,36 @@ out_slots:
return ret;
}
+void kvm_vgic_finalize_idregs(struct kvm *kvm)
+{
+ u32 type = kvm->arch.vgic.vgic_model;
+ u64 aa64pfr0, aa64pfr2, pfr1;
+
+ aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
+ aa64pfr2 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1) & ~ID_AA64PFR2_EL1_GCIE;
+ pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
+
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
+ aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
+ pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ aa64pfr2 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR2_EL1, GCIE, IMP);
+ break;
+ default:
+ WARN_ONCE(1, "Unknown VGIC type!!!\n");
+ }
+
+ kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
+ kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR2_EL1, aa64pfr2);
+ kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
+}
+
/* GENERIC PROBE */
void kvm_vgic_cpu_up(void)
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 3d1a776b716d..a96c77dccf35 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -336,6 +336,10 @@ int kvm_register_vgic_device(unsigned long type)
break;
ret = kvm_vgic_register_its_device();
break;
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ ret = kvm_register_device_ops(&kvm_arm_vgic_v5_ops,
+ KVM_DEV_TYPE_ARM_VGIC_V5);
+ break;
}
return ret;
@@ -639,7 +643,7 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
if (vgic_initialized(dev->kvm))
return -EBUSY;
- if (!irq_is_ppi(val))
+ if (!irq_is_ppi(dev->kvm, val))
return -EINVAL;
dev->kvm->arch.vgic.mi_intid = val;
@@ -715,3 +719,104 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = {
.get_attr = vgic_v3_get_attr,
.has_attr = vgic_v3_has_attr,
};
+
+static int vgic_v5_get_userspace_ppis(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct vgic_v5_vm *gicv5_vm = &dev->kvm->arch.vgic.gicv5_vm;
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ int ret;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+
+ /*
+ * We either support 64 or 128 PPIs. In the former case, we need to
+ * return 0s for the second 64 bits as we have no storage backing those.
+ */
+ ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 0, 64), uaddr);
+ if (ret)
+ return ret;
+ uaddr++;
+
+ if (VGIC_V5_NR_PRIVATE_IRQS == 128)
+ ret = put_user(bitmap_read(gicv5_vm->userspace_ppis, 64, 128), uaddr);
+ else
+ ret = put_user(0, uaddr);
+
+ return ret;
+}
+
+static int vgic_v5_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return -ENXIO;
+ case KVM_DEV_ARM_VGIC_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_ARM_VGIC_CTRL_INIT:
+ return vgic_set_common_attr(dev, attr);
+ case KVM_DEV_ARM_VGIC_USERSPACE_PPIS:
+ default:
+ return -ENXIO;
+ }
+ default:
+ return -ENXIO;
+ }
+
+}
+
+static int vgic_v5_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return -ENXIO;
+ case KVM_DEV_ARM_VGIC_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_ARM_VGIC_CTRL_INIT:
+ return vgic_get_common_attr(dev, attr);
+ case KVM_DEV_ARM_VGIC_USERSPACE_PPIS:
+ return vgic_v5_get_userspace_ppis(dev, attr);
+ default:
+ return -ENXIO;
+ }
+ default:
+ return -ENXIO;
+ }
+}
+
+static int vgic_v5_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return -ENXIO;
+ case KVM_DEV_ARM_VGIC_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_ARM_VGIC_CTRL_INIT:
+ return 0;
+ case KVM_DEV_ARM_VGIC_USERSPACE_PPIS:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+ default:
+ return -ENXIO;
+ }
+}
+
+struct kvm_device_ops kvm_arm_vgic_v5_ops = {
+ .name = "kvm-arm-vgic-v5",
+ .create = vgic_create,
+ .destroy = vgic_destroy,
+ .set_attr = vgic_v5_set_attr,
+ .get_attr = vgic_v5_get_attr,
+ .has_attr = vgic_v5_has_attr,
+};
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index a573b1f0c6cb..74d76dec9730 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -842,18 +842,46 @@ vgic_find_mmio_region(const struct vgic_register_region *regions,
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_set_vmcr(vcpu, vmcr);
- else
+ const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ switch (dist->vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ vgic_v5_set_vmcr(vcpu, vmcr);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
vgic_v3_set_vmcr(vcpu, vmcr);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ vgic_v3_set_vmcr(vcpu, vmcr);
+ else
+ vgic_v2_set_vmcr(vcpu, vmcr);
+ break;
+ default:
+ BUG();
+ }
}
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
{
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_get_vmcr(vcpu, vmcr);
- else
+ const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ switch (dist->vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ vgic_v5_get_vmcr(vcpu, vmcr);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
vgic_v3_get_vmcr(vcpu, vmcr);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ vgic_v3_get_vmcr(vcpu, vmcr);
+ else
+ vgic_v2_get_vmcr(vcpu, vmcr);
+ break;
+ default:
+ BUG();
+ }
}
/*
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 6a355eca1934..9e841e7afd4a 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -499,7 +499,7 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
- if (!vgic_is_v3(vcpu->kvm))
+ if (!vgic_host_has_gicv3())
return;
/* Hide GICv3 sysreg if necessary */
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
index 331651087e2c..2b6cd5c3f9c2 100644
--- a/arch/arm64/kvm/vgic/vgic-v5.c
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -1,28 +1,52 @@
// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, 2026 Arm Ltd.
+ */
#include <kvm/arm_vgic.h>
+
+#include <linux/bitops.h>
#include <linux/irqchip/arm-vgic-info.h>
#include "vgic.h"
+static struct vgic_v5_ppi_caps ppi_caps;
+
+/*
+ * Not all PPIs are guaranteed to be implemented for GICv5. Deterermine which
+ * ones are, and generate a mask.
+ */
+static void vgic_v5_get_implemented_ppis(void)
+{
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF))
+ return;
+
+ /*
+ * If we have KVM, we have EL2, which means that we have support for the
+ * EL1 and EL2 Physical & Virtual timers.
+ */
+ __assign_bit(GICV5_ARCH_PPI_CNTHP, ppi_caps.impl_ppi_mask, 1);
+ __assign_bit(GICV5_ARCH_PPI_CNTV, ppi_caps.impl_ppi_mask, 1);
+ __assign_bit(GICV5_ARCH_PPI_CNTHV, ppi_caps.impl_ppi_mask, 1);
+ __assign_bit(GICV5_ARCH_PPI_CNTP, ppi_caps.impl_ppi_mask, 1);
+
+ /* The SW_PPI should be available */
+ __assign_bit(GICV5_ARCH_PPI_SW_PPI, ppi_caps.impl_ppi_mask, 1);
+
+ /* The PMUIRQ is available if we have the PMU */
+ __assign_bit(GICV5_ARCH_PPI_PMUIRQ, ppi_caps.impl_ppi_mask, system_supports_pmuv3());
+}
+
/*
* Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
- * Currently only supports GICv3-based VMs on a GICv5 host, and hence only
- * registers a VGIC_V3 device.
*/
int vgic_v5_probe(const struct gic_kvm_info *info)
{
+ bool v5_registered = false;
u64 ich_vtr_el2;
int ret;
- if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
- return -ENODEV;
-
kvm_vgic_global_state.type = VGIC_V5;
- kvm_vgic_global_state.has_gcie_v3_compat = true;
-
- /* We only support v3 compat mode - use vGICv3 limits */
- kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
kvm_vgic_global_state.vcpu_base = 0;
kvm_vgic_global_state.vctrl_base = NULL;
@@ -30,6 +54,38 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
kvm_vgic_global_state.has_gicv4 = false;
kvm_vgic_global_state.has_gicv4_1 = false;
+ /*
+ * GICv5 is currently not supported in Protected mode. Skip the
+ * registration of GICv5 completely to make sure no guests can create a
+ * GICv5-based guest.
+ */
+ if (is_protected_kvm_enabled()) {
+ kvm_info("GICv5-based guests are not supported with pKVM\n");
+ goto skip_v5;
+ }
+
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V5_MAX_CPUS;
+
+ vgic_v5_get_implemented_ppis();
+
+ ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V5);
+ if (ret) {
+ kvm_err("Cannot register GICv5 KVM device.\n");
+ goto skip_v5;
+ }
+
+ v5_registered = true;
+ kvm_info("GCIE system register CPU interface\n");
+
+skip_v5:
+ /* If we don't support the GICv3 compat mode we're done. */
+ if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) {
+ if (!v5_registered)
+ return -ENODEV;
+ return 0;
+ }
+
+ kvm_vgic_global_state.has_gcie_v3_compat = true;
ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2;
@@ -45,6 +101,10 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
return ret;
}
+ /* We potentially limit the max VCPUs further than we need to here */
+ kvm_vgic_global_state.max_gic_vcpus = min(VGIC_V3_MAX_CPUS,
+ VGIC_V5_MAX_CPUS);
+
static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
kvm_info("GCIE legacy system register CPU interface\n");
@@ -52,3 +112,428 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
return 0;
}
+
+void vgic_v5_reset(struct kvm_vcpu *vcpu)
+{
+ /*
+ * We always present 16-bits of ID space to the guest, irrespective of
+ * the host allowing more.
+ */
+ vcpu->arch.vgic_cpu.num_id_bits = ICC_IDR0_EL1_ID_BITS_16BITS;
+
+ /*
+ * The GICv5 architeture only supports 5-bits of priority in the
+ * CPUIF (but potentially fewer in the IRS).
+ */
+ vcpu->arch.vgic_cpu.num_pri_bits = 5;
+}
+
+int vgic_v5_init(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long idx;
+
+ if (vgic_initialized(kvm))
+ return 0;
+
+ kvm_for_each_vcpu(idx, vcpu, kvm) {
+ if (vcpu_has_nv(vcpu)) {
+ kvm_err("Nested GICv5 VMs are currently unsupported\n");
+ return -EINVAL;
+ }
+ }
+
+ /* We only allow userspace to drive the SW_PPI, if it is implemented. */
+ bitmap_zero(kvm->arch.vgic.gicv5_vm.userspace_ppis,
+ VGIC_V5_NR_PRIVATE_IRQS);
+ __assign_bit(GICV5_ARCH_PPI_SW_PPI,
+ kvm->arch.vgic.gicv5_vm.userspace_ppis,
+ VGIC_V5_NR_PRIVATE_IRQS);
+ bitmap_and(kvm->arch.vgic.gicv5_vm.userspace_ppis,
+ kvm->arch.vgic.gicv5_vm.userspace_ppis,
+ ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+
+ return 0;
+}
+
+int vgic_v5_map_resources(struct kvm *kvm)
+{
+ if (!vgic_initialized(kvm))
+ return -EBUSY;
+
+ return 0;
+}
+
+int vgic_v5_finalize_ppi_state(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu0;
+ int i;
+
+ if (!vgic_is_v5(kvm))
+ return 0;
+
+ /* The PPI state for all VCPUs should be the same. Pick the first. */
+ vcpu0 = kvm_get_vcpu(kvm, 0);
+
+ bitmap_zero(kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+ bitmap_zero(kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS);
+
+ for_each_set_bit(i, ppi_caps.impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ const u32 intid = vgic_v5_make_ppi(i);
+ struct vgic_irq *irq;
+
+ irq = vgic_get_vcpu_irq(vcpu0, intid);
+
+ /* Expose PPIs with an owner or the SW_PPI, only */
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
+ if (irq->owner || i == GICV5_ARCH_PPI_SW_PPI) {
+ __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, 1);
+ __assign_bit(i, kvm->arch.vgic.gicv5_vm.vgic_ppi_hmr,
+ irq->config == VGIC_CONFIG_LEVEL);
+ }
+ }
+
+ vgic_put_irq(vcpu0->kvm, irq);
+ }
+
+ return 0;
+}
+
+static u32 vgic_v5_get_effective_priority_mask(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ u32 highest_ap, priority_mask;
+
+ /*
+ * If the guest's CPU has not opted to receive interrupts, then the
+ * effective running priority is the highest priority. Just return 0
+ * (the highest priority).
+ */
+ if (!FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, cpu_if->vgic_vmcr))
+ return 0;
+
+ /*
+ * Counting the number of trailing zeros gives the current active
+ * priority. Explicitly use the 32-bit version here as we have 32
+ * priorities. 32 then means that there are no active priorities.
+ */
+ highest_ap = cpu_if->vgic_apr ? __builtin_ctz(cpu_if->vgic_apr) : 32;
+
+ /*
+ * An interrupt is of sufficient priority if it is equal to or
+ * greater than the priority mask. Add 1 to the priority mask
+ * (i.e., lower priority) to match the APR logic before taking
+ * the min. This gives us the lowest priority that is masked.
+ */
+ priority_mask = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, cpu_if->vgic_vmcr);
+
+ return min(highest_ap, priority_mask + 1);
+}
+
+/*
+ * For GICv5, the PPIs are mostly directly managed by the hardware. We (the
+ * hypervisor) handle the pending, active, enable state save/restore, but don't
+ * need the PPIs to be queued on a per-VCPU AP list. Therefore, sanity check the
+ * state, unlock, and return.
+ */
+bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags)
+ __releases(&irq->irq_lock)
+{
+ struct kvm_vcpu *vcpu;
+
+ lockdep_assert_held(&irq->irq_lock);
+
+ if (WARN_ON_ONCE(!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, irq->intid)))
+ goto out_unlock_fail;
+
+ vcpu = irq->target_vcpu;
+ if (WARN_ON_ONCE(!vcpu))
+ goto out_unlock_fail;
+
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+ /* Directly kick the target VCPU to make sure it sees the IRQ */
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+ kvm_vcpu_kick(vcpu);
+
+ return true;
+
+out_unlock_fail:
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+ return false;
+}
+
+/*
+ * Sets/clears the corresponding bit in the ICH_PPI_DVIR register.
+ */
+void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ u32 ppi;
+
+ lockdep_assert_held(&irq->irq_lock);
+
+ ppi = vgic_v5_get_hwirq_id(irq->intid);
+ __assign_bit(ppi, cpu_if->vgic_ppi_dvir, dvi);
+}
+
+static struct irq_ops vgic_v5_ppi_irq_ops = {
+ .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock,
+ .set_direct_injection = vgic_v5_set_ppi_dvi,
+};
+
+void vgic_v5_set_ppi_ops(struct kvm_vcpu *vcpu, u32 vintid)
+{
+ kvm_vgic_set_irq_ops(vcpu, vintid, &vgic_v5_ppi_irq_ops);
+}
+
+/*
+ * Sync back the PPI priorities to the vgic_irq shadow state for any interrupts
+ * exposed to the guest (skipping all others).
+ */
+static void vgic_v5_sync_ppi_priorities(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ u64 priorityr;
+ int i;
+
+ /*
+ * We have up to 16 PPI Priority regs, but only have a few interrupts
+ * that the guest is allowed to use. Limit our sync of PPI priorities to
+ * those actually exposed to the guest by first iterating over the mask
+ * of exposed PPIs.
+ */
+ for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ u32 intid = vgic_v5_make_ppi(i);
+ struct vgic_irq *irq;
+ int pri_idx, pri_reg, pri_bit;
+ u8 priority;
+
+ /*
+ * Determine which priority register and the field within it to
+ * extract.
+ */
+ pri_reg = i / 8;
+ pri_idx = i % 8;
+ pri_bit = pri_idx * 8;
+
+ priorityr = cpu_if->vgic_ppi_priorityr[pri_reg];
+ priority = field_get(GENMASK(pri_bit + 4, pri_bit), priorityr);
+
+ irq = vgic_get_vcpu_irq(vcpu, intid);
+
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+ irq->priority = priority;
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
+bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu)
+{
+ unsigned int priority_mask;
+ int i;
+
+ priority_mask = vgic_v5_get_effective_priority_mask(vcpu);
+
+ /*
+ * If the combined priority mask is 0, nothing can be signalled! In the
+ * case where the guest has disabled interrupt delivery for the vcpu
+ * (via ICV_CR0_EL1.EN->ICH_VMCR_EL2.EN), we calculate the priority mask
+ * as 0 too (the highest possible priority).
+ */
+ if (!priority_mask)
+ return false;
+
+ for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS) {
+ u32 intid = vgic_v5_make_ppi(i);
+ bool has_pending = false;
+ struct vgic_irq *irq;
+
+ irq = vgic_get_vcpu_irq(vcpu, intid);
+
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+ has_pending = (irq->enabled && irq_is_pending(irq) &&
+ irq->priority <= priority_mask);
+
+ vgic_put_irq(vcpu->kvm, irq);
+
+ if (has_pending)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Detect any PPIs state changes, and propagate the state with KVM's
+ * shadow structures.
+ */
+void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ DECLARE_BITMAP(changed_active, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(changed_pending, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(changed_bits, VGIC_V5_NR_PRIVATE_IRQS);
+ unsigned long *activer, *pendr_entry, *pendr;
+ int i;
+
+ activer = host_data_ptr(vgic_v5_ppi_state)->activer_exit;
+ pendr_entry = host_data_ptr(vgic_v5_ppi_state)->pendr_entry;
+ pendr = host_data_ptr(vgic_v5_ppi_state)->pendr_exit;
+
+ bitmap_xor(changed_active, cpu_if->vgic_ppi_activer, activer,
+ VGIC_V5_NR_PRIVATE_IRQS);
+ bitmap_xor(changed_pending, pendr_entry, pendr,
+ VGIC_V5_NR_PRIVATE_IRQS);
+ bitmap_or(changed_bits, changed_active, changed_pending,
+ VGIC_V5_NR_PRIVATE_IRQS);
+
+ for_each_set_bit(i, changed_bits, VGIC_V5_NR_PRIVATE_IRQS) {
+ u32 intid = vgic_v5_make_ppi(i);
+ struct vgic_irq *irq;
+
+ irq = vgic_get_vcpu_irq(vcpu, intid);
+
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
+ irq->active = test_bit(i, activer);
+
+ /* This is an OR to avoid losing incoming edges! */
+ if (irq->config == VGIC_CONFIG_EDGE)
+ irq->pending_latch |= test_bit(i, pendr);
+ }
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ /*
+ * Re-inject the exit state as entry state next time!
+ *
+ * Note that the write of the Enable state is trapped, and hence there
+ * is nothing to explcitly sync back here as we already have the latest
+ * copy by definition.
+ */
+ bitmap_copy(cpu_if->vgic_ppi_activer, activer, VGIC_V5_NR_PRIVATE_IRQS);
+}
+
+void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu)
+{
+ DECLARE_BITMAP(pendr, VGIC_V5_NR_PRIVATE_IRQS);
+ int i;
+
+ /*
+ * Time to enter the guest - we first need to build the guest's
+ * ICC_PPI_PENDRx_EL1, however.
+ */
+ bitmap_zero(pendr, VGIC_V5_NR_PRIVATE_IRQS);
+ for_each_set_bit(i, vcpu->kvm->arch.vgic.gicv5_vm.vgic_ppi_mask,
+ VGIC_V5_NR_PRIVATE_IRQS) {
+ u32 intid = vgic_v5_make_ppi(i);
+ struct vgic_irq *irq;
+
+ irq = vgic_get_vcpu_irq(vcpu, intid);
+
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+ __assign_bit(i, pendr, irq_is_pending(irq));
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ /*
+ * Copy the shadow state to the pending reg that will be written to the
+ * ICH_PPI_PENDRx_EL2 regs. While the guest is running we track any
+ * incoming changes to the pending state in the vgic_irq structures. The
+ * incoming changes are merged with the outgoing changes on the return
+ * path.
+ */
+ bitmap_copy(host_data_ptr(vgic_v5_ppi_state)->pendr_entry, pendr,
+ VGIC_V5_NR_PRIVATE_IRQS);
+
+ /*
+ * Make sure that we can correctly detect "edges" in the PPI
+ * state. There's a path where we never actually enter the guest, and
+ * failure to do this risks losing pending state
+ */
+ bitmap_copy(host_data_ptr(vgic_v5_ppi_state)->pendr_exit, pendr,
+ VGIC_V5_NR_PRIVATE_IRQS);
+}
+
+void vgic_v5_load(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+ /*
+ * On the WFI path, vgic_load is called a second time. The first is when
+ * scheduling in the vcpu thread again, and the second is when leaving
+ * WFI. Skip the second instance as it serves no purpose and just
+ * restores the same state again.
+ */
+ if (cpu_if->gicv5_vpe.resident)
+ return;
+
+ kvm_call_hyp(__vgic_v5_restore_vmcr_apr, cpu_if);
+
+ cpu_if->gicv5_vpe.resident = true;
+}
+
+void vgic_v5_put(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+ /*
+ * Do nothing if we're not resident. This can happen in the WFI path
+ * where we do a vgic_put in the WFI path and again later when
+ * descheduling the thread. We risk losing VMCR state if we sync it
+ * twice, so instead return early in this case.
+ */
+ if (!cpu_if->gicv5_vpe.resident)
+ return;
+
+ kvm_call_hyp(__vgic_v5_save_apr, cpu_if);
+
+ cpu_if->gicv5_vpe.resident = false;
+
+ /* The shadow priority is only updated on entering WFI */
+ if (vcpu_get_flag(vcpu, IN_WFI))
+ vgic_v5_sync_ppi_priorities(vcpu);
+}
+
+void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ u64 vmcr = cpu_if->vgic_vmcr;
+
+ vmcrp->en = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcr);
+ vmcrp->pmr = FIELD_GET(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcr);
+}
+
+void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+ u64 vmcr;
+
+ vmcr = FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_VPMR, vmcrp->pmr) |
+ FIELD_PREP(FEAT_GCIE_ICH_VMCR_EL2_EN, vmcrp->en);
+
+ cpu_if->vgic_vmcr = vmcr;
+}
+
+void vgic_v5_restore_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+ __vgic_v5_restore_state(cpu_if);
+ __vgic_v5_restore_ppi_state(cpu_if);
+ dsb(sy);
+}
+
+void vgic_v5_save_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v5_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v5;
+
+ __vgic_v5_save_state(cpu_if);
+ __vgic_v5_save_ppi_state(cpu_if);
+ dsb(sy);
+}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index e22b79cfff96..1e9fe8764584 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -86,6 +86,10 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
*/
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
{
+ /* Non-private IRQs are not yet implemented for GICv5 */
+ if (vgic_is_v5(kvm))
+ return NULL;
+
/* SPIs */
if (intid >= VGIC_NR_PRIVATE_IRQS &&
intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
@@ -94,7 +98,7 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
}
/* LPIs */
- if (intid >= VGIC_MIN_LPI)
+ if (irq_is_lpi(kvm, intid))
return vgic_get_lpi(kvm, intid);
return NULL;
@@ -105,6 +109,18 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
if (WARN_ON(!vcpu))
return NULL;
+ if (vgic_is_v5(vcpu->kvm)) {
+ u32 int_num, hwirq_id;
+
+ if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
+ return NULL;
+
+ hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
+ int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
+
+ return &vcpu->arch.vgic_cpu.private_irqs[int_num];
+ }
+
/* SGIs and PPIs */
if (intid < VGIC_NR_PRIVATE_IRQS) {
intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
@@ -123,7 +139,7 @@ static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq
static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
- if (irq->intid < VGIC_MIN_LPI)
+ if (!irq_is_lpi(kvm, irq->intid))
return false;
return refcount_dec_and_test(&irq->refcount);
@@ -148,7 +164,7 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
* Acquire/release it early on lockdep kernels to make locking issues
* in rare release paths a bit more obvious.
*/
- if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) {
+ if (IS_ENABLED(CONFIG_LOCKDEP) && irq_is_lpi(kvm, irq->intid)) {
guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
}
@@ -186,7 +202,7 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
- if (irq->intid >= VGIC_MIN_LPI) {
+ if (irq_is_lpi(vcpu->kvm, irq->intid)) {
raw_spin_lock(&irq->irq_lock);
list_del(&irq->ap_list);
irq->vcpu = NULL;
@@ -404,6 +420,9 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
lockdep_assert_held(&irq->irq_lock);
+ if (irq->ops && irq->ops->queue_irq_unlock)
+ return irq->ops->queue_irq_unlock(kvm, irq, flags);
+
retry:
vcpu = vgic_target_oracle(irq);
if (irq->vcpu || !vcpu) {
@@ -521,12 +540,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
if (ret)
return ret;
- if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
+ if (!vcpu && irq_is_private(kvm, intid))
return -EINVAL;
trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
- if (intid < VGIC_NR_PRIVATE_IRQS)
+ if (irq_is_private(kvm, intid))
irq = vgic_get_vcpu_irq(vcpu, intid);
else
irq = vgic_get_irq(kvm, intid);
@@ -553,10 +572,27 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
return 0;
}
+void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
+ struct irq_ops *ops)
+{
+ struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
+
+ BUG_ON(!irq);
+
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
+ irq->ops = ops;
+
+ vgic_put_irq(vcpu->kvm, irq);
+}
+
+void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid)
+{
+ kvm_vgic_set_irq_ops(vcpu, vintid, NULL);
+}
+
/* @irq->irq_lock must be held */
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
- unsigned int host_irq,
- struct irq_ops *ops)
+ unsigned int host_irq)
{
struct irq_desc *desc;
struct irq_data *data;
@@ -576,20 +612,25 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
irq->hw = true;
irq->host_irq = host_irq;
irq->hwintid = data->hwirq;
- irq->ops = ops;
+
+ if (irq->ops && irq->ops->set_direct_injection)
+ irq->ops->set_direct_injection(vcpu, irq, true);
+
return 0;
}
/* @irq->irq_lock must be held */
static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
{
+ if (irq->ops && irq->ops->set_direct_injection)
+ irq->ops->set_direct_injection(irq->target_vcpu, irq, false);
+
irq->hw = false;
irq->hwintid = 0;
- irq->ops = NULL;
}
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, struct irq_ops *ops)
+ u32 vintid)
{
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
unsigned long flags;
@@ -598,7 +639,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
BUG_ON(!irq);
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
+ ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
@@ -685,7 +726,7 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
return -EAGAIN;
/* SGIs and LPIs cannot be wired up to any device */
- if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
+ if (!irq_is_ppi(vcpu->kvm, intid) && !vgic_valid_spi(vcpu->kvm, intid))
return -EINVAL;
irq = vgic_get_vcpu_irq(vcpu, intid);
@@ -812,8 +853,13 @@ retry:
vgic_release_deleted_lpis(vcpu->kvm);
}
-static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
+static void vgic_fold_state(struct kvm_vcpu *vcpu)
{
+ if (vgic_is_v5(vcpu->kvm)) {
+ vgic_v5_fold_ppi_state(vcpu);
+ return;
+ }
+
if (!*host_data_ptr(last_lr_irq))
return;
@@ -1002,7 +1048,10 @@ static inline bool can_access_vgic_from_kernel(void)
static inline void vgic_save_state(struct kvm_vcpu *vcpu)
{
- if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ /* No switch statement here. See comment in vgic_restore_state() */
+ if (vgic_is_v5(vcpu->kvm))
+ vgic_v5_save_state(vcpu);
+ else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_save_state(vcpu);
else
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
@@ -1011,20 +1060,24 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
- /* If nesting, emulate the HW effect from L0 to L1 */
- if (vgic_state_is_nested(vcpu)) {
- vgic_v3_sync_nested(vcpu);
- return;
- }
+ if (vgic_is_v3(vcpu->kvm)) {
+ /* If nesting, emulate the HW effect from L0 to L1 */
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_sync_nested(vcpu);
+ return;
+ }
- if (vcpu_has_nv(vcpu))
- vgic_v3_nested_update_mi(vcpu);
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+ }
if (can_access_vgic_from_kernel())
vgic_save_state(vcpu);
- vgic_fold_lr_state(vcpu);
- vgic_prune_ap_list(vcpu);
+ vgic_fold_state(vcpu);
+
+ if (!vgic_is_v5(vcpu->kvm))
+ vgic_prune_ap_list(vcpu);
}
/* Sync interrupts that were deactivated through a DIR trap */
@@ -1040,12 +1093,34 @@ void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
{
- if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ /*
+ * As nice as it would be to restructure this code into a switch
+ * statement as can be found elsewhere, the logic quickly gets ugly.
+ *
+ * __vgic_v3_restore_state() is doing a lot of heavy lifting here. It is
+ * required for GICv3-on-GICv3, GICv2-on-GICv3, GICv3-on-GICv5, and the
+ * no-in-kernel-irqchip case on GICv3 hardware. Hence, adding a switch
+ * here results in much more complex code.
+ */
+ if (vgic_is_v5(vcpu->kvm))
+ vgic_v5_restore_state(vcpu);
+ else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_restore_state(vcpu);
else
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
}
+static void vgic_flush_state(struct kvm_vcpu *vcpu)
+{
+ if (vgic_is_v5(vcpu->kvm)) {
+ vgic_v5_flush_ppi_state(vcpu);
+ return;
+ }
+
+ scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
+ vgic_flush_lr_state(vcpu);
+}
+
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
@@ -1082,42 +1157,69 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
- scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
- vgic_flush_lr_state(vcpu);
+ vgic_flush_state(vcpu);
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);
- if (vgic_supports_direct_irqs(vcpu->kvm))
+ if (vgic_supports_direct_irqs(vcpu->kvm) && kvm_vgic_global_state.has_gicv4)
vgic_v4_commit(vcpu);
}
void kvm_vgic_load(struct kvm_vcpu *vcpu)
{
+ const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
}
- if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
- vgic_v2_load(vcpu);
- else
+ switch (dist->vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ vgic_v5_load(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
vgic_v3_load(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ vgic_v3_load(vcpu);
+ else
+ vgic_v2_load(vcpu);
+ break;
+ default:
+ BUG();
+ }
}
void kvm_vgic_put(struct kvm_vcpu *vcpu)
{
+ const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
}
- if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
- vgic_v2_put(vcpu);
- else
+ switch (dist->vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V5:
+ vgic_v5_put(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
vgic_v3_put(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ vgic_v3_put(vcpu);
+ else
+ vgic_v2_put(vcpu);
+ break;
+ default:
+ BUG();
+ }
}
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
@@ -1128,6 +1230,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
unsigned long flags;
struct vgic_vmcr vmcr;
+ if (vgic_is_v5(vcpu->kvm))
+ return vgic_v5_has_pending_ppi(vcpu);
+
if (!vcpu->kvm->arch.vgic.enabled)
return false;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index c9b3bb07e483..9d941241c8a2 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -187,6 +187,7 @@ static inline u64 vgic_ich_hcr_trap_bits(void)
* registers regardless of the hardware backed GIC used.
*/
struct vgic_vmcr {
+ u32 en; /* GICv5-specific */
u32 grpen0;
u32 grpen1;
@@ -363,6 +364,19 @@ void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
int vgic_v5_probe(const struct gic_kvm_info *info);
+void vgic_v5_reset(struct kvm_vcpu *vcpu);
+int vgic_v5_init(struct kvm *kvm);
+int vgic_v5_map_resources(struct kvm *kvm);
+void vgic_v5_set_ppi_ops(struct kvm_vcpu *vcpu, u32 vintid);
+bool vgic_v5_has_pending_ppi(struct kvm_vcpu *vcpu);
+void vgic_v5_flush_ppi_state(struct kvm_vcpu *vcpu);
+void vgic_v5_fold_ppi_state(struct kvm_vcpu *vcpu);
+void vgic_v5_load(struct kvm_vcpu *vcpu);
+void vgic_v5_put(struct kvm_vcpu *vcpu);
+void vgic_v5_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v5_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v5_restore_state(struct kvm_vcpu *vcpu);
+void vgic_v5_save_state(struct kvm_vcpu *vcpu);
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
@@ -425,15 +439,6 @@ void vgic_its_invalidate_all_caches(struct kvm *kvm);
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
int vgic_its_invall(struct kvm_vcpu *vcpu);
-bool system_supports_direct_sgis(void);
-bool vgic_supports_direct_msis(struct kvm *kvm);
-bool vgic_supports_direct_sgis(struct kvm *kvm);
-
-static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
-{
- return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm);
-}
-
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
void vgic_v4_configure_vsgis(struct kvm *kvm);
@@ -447,6 +452,11 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
+static inline bool kvm_has_gicv5(struct kvm *kvm)
+{
+ return kvm_has_feat(kvm, ID_AA64PFR2_EL1, GCIE, IMP);
+}
+
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu);
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
@@ -454,15 +464,32 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
-static inline bool vgic_is_v3_compat(struct kvm *kvm)
+static inline bool vgic_host_has_gicv3(void)
{
- return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) &&
+ /*
+ * Either the host is a native GICv3, or it is GICv5 with
+ * FEAT_GCIE_LEGACY.
+ */
+ return kvm_vgic_global_state.type == VGIC_V3 ||
kvm_vgic_global_state.has_gcie_v3_compat;
}
-static inline bool vgic_is_v3(struct kvm *kvm)
+static inline bool vgic_host_has_gicv5(void)
+{
+ return kvm_vgic_global_state.type == VGIC_V5;
+}
+
+bool system_supports_direct_sgis(void);
+bool vgic_supports_direct_msis(struct kvm *kvm);
+bool vgic_supports_direct_sgis(struct kvm *kvm);
+
+static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
{
- return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm);
+ /* GICv5 always supports direct IRQs */
+ if (vgic_is_v5(kvm))
+ return true;
+
+ return vgic_supports_direct_msis(kvm) || vgic_supports_direct_sgis(kvm);
}
int vgic_its_debug_init(struct kvm_device *dev);
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 9d1c21108057..51dcca5b2fa6 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -3243,6 +3243,14 @@ UnsignedEnum 3:0 ID_BITS
EndEnum
EndSysreg
+Sysreg ICC_HPPIR_EL1 3 0 12 10 3
+Res0 63:33
+Field 32 HPPIV
+Field 31:29 TYPE
+Res0 28:24
+Field 23:0 ID
+EndSysreg
+
Sysreg ICC_ICSR_EL1 3 0 12 10 4
Res0 63:48
Field 47:32 IAFFID
@@ -3257,6 +3265,11 @@ Field 1 Enabled
Field 0 F
EndSysreg
+Sysreg ICC_IAFFIDR_EL1 3 0 12 10 5
+Res0 63:16
+Field 15:0 IAFFID
+EndSysreg
+
SysregFields ICC_PPI_ENABLERx_EL1
Field 63 EN63
Field 62 EN62
@@ -3663,6 +3676,42 @@ Res0 14:12
Field 11:0 AFFINITY
EndSysreg
+Sysreg ICC_APR_EL1 3 1 12 0 0
+Res0 63:32
+Field 31 P31
+Field 30 P30
+Field 29 P29
+Field 28 P28
+Field 27 P27
+Field 26 P26
+Field 25 P25
+Field 24 P24
+Field 23 P23
+Field 22 P22
+Field 21 P21
+Field 20 P20
+Field 19 P19
+Field 18 P18
+Field 17 P17
+Field 16 P16
+Field 15 P15
+Field 14 P14
+Field 13 P13
+Field 12 P12
+Field 11 P11
+Field 10 P10
+Field 9 P9
+Field 8 P8
+Field 7 P7
+Field 6 P6
+Field 5 P5
+Field 4 P4
+Field 3 P3
+Field 2 P2
+Field 1 P1
+Field 0 P0
+EndSysreg
+
Sysreg ICC_CR0_EL1 3 1 12 0 1
Res0 63:39
Field 38 PID
@@ -4687,6 +4736,42 @@ Field 31:16 PhyPARTID29
Field 15:0 PhyPARTID28
EndSysreg
+Sysreg ICH_APR_EL2 3 4 12 8 4
+Res0 63:32
+Field 31 P31
+Field 30 P30
+Field 29 P29
+Field 28 P28
+Field 27 P27
+Field 26 P26
+Field 25 P25
+Field 24 P24
+Field 23 P23
+Field 22 P22
+Field 21 P21
+Field 20 P20
+Field 19 P19
+Field 18 P18
+Field 17 P17
+Field 16 P16
+Field 15 P15
+Field 14 P14
+Field 13 P13
+Field 12 P12
+Field 11 P11
+Field 10 P10
+Field 9 P9
+Field 8 P8
+Field 7 P7
+Field 6 P6
+Field 5 P5
+Field 4 P4
+Field 3 P3
+Field 2 P2
+Field 1 P1
+Field 0 P0
+EndSysreg
+
Sysreg ICH_HFGRTR_EL2 3 4 12 9 4
Res0 63:21
Field 20 ICC_PPI_ACTIVERn_EL1
@@ -4735,6 +4820,306 @@ Field 1 GICCDDIS
Field 0 GICCDEN
EndSysreg
+SysregFields ICH_PPI_DVIRx_EL2
+Field 63 DVI63
+Field 62 DVI62
+Field 61 DVI61
+Field 60 DVI60
+Field 59 DVI59
+Field 58 DVI58
+Field 57 DVI57
+Field 56 DVI56
+Field 55 DVI55
+Field 54 DVI54
+Field 53 DVI53
+Field 52 DVI52
+Field 51 DVI51
+Field 50 DVI50
+Field 49 DVI49
+Field 48 DVI48
+Field 47 DVI47
+Field 46 DVI46
+Field 45 DVI45
+Field 44 DVI44
+Field 43 DVI43
+Field 42 DVI42
+Field 41 DVI41
+Field 40 DVI40
+Field 39 DVI39
+Field 38 DVI38
+Field 37 DVI37
+Field 36 DVI36
+Field 35 DVI35
+Field 34 DVI34
+Field 33 DVI33
+Field 32 DVI32
+Field 31 DVI31
+Field 30 DVI30
+Field 29 DVI29
+Field 28 DVI28
+Field 27 DVI27
+Field 26 DVI26
+Field 25 DVI25
+Field 24 DVI24
+Field 23 DVI23
+Field 22 DVI22
+Field 21 DVI21
+Field 20 DVI20
+Field 19 DVI19
+Field 18 DVI18
+Field 17 DVI17
+Field 16 DVI16
+Field 15 DVI15
+Field 14 DVI14
+Field 13 DVI13
+Field 12 DVI12
+Field 11 DVI11
+Field 10 DVI10
+Field 9 DVI9
+Field 8 DVI8
+Field 7 DVI7
+Field 6 DVI6
+Field 5 DVI5
+Field 4 DVI4
+Field 3 DVI3
+Field 2 DVI2
+Field 1 DVI1
+Field 0 DVI0
+EndSysregFields
+
+Sysreg ICH_PPI_DVIR0_EL2 3 4 12 10 0
+Fields ICH_PPI_DVIx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_DVIR1_EL2 3 4 12 10 1
+Fields ICH_PPI_DVIx_EL2
+EndSysreg
+
+SysregFields ICH_PPI_ENABLERx_EL2
+Field 63 EN63
+Field 62 EN62
+Field 61 EN61
+Field 60 EN60
+Field 59 EN59
+Field 58 EN58
+Field 57 EN57
+Field 56 EN56
+Field 55 EN55
+Field 54 EN54
+Field 53 EN53
+Field 52 EN52
+Field 51 EN51
+Field 50 EN50
+Field 49 EN49
+Field 48 EN48
+Field 47 EN47
+Field 46 EN46
+Field 45 EN45
+Field 44 EN44
+Field 43 EN43
+Field 42 EN42
+Field 41 EN41
+Field 40 EN40
+Field 39 EN39
+Field 38 EN38
+Field 37 EN37
+Field 36 EN36
+Field 35 EN35
+Field 34 EN34
+Field 33 EN33
+Field 32 EN32
+Field 31 EN31
+Field 30 EN30
+Field 29 EN29
+Field 28 EN28
+Field 27 EN27
+Field 26 EN26
+Field 25 EN25
+Field 24 EN24
+Field 23 EN23
+Field 22 EN22
+Field 21 EN21
+Field 20 EN20
+Field 19 EN19
+Field 18 EN18
+Field 17 EN17
+Field 16 EN16
+Field 15 EN15
+Field 14 EN14
+Field 13 EN13
+Field 12 EN12
+Field 11 EN11
+Field 10 EN10
+Field 9 EN9
+Field 8 EN8
+Field 7 EN7
+Field 6 EN6
+Field 5 EN5
+Field 4 EN4
+Field 3 EN3
+Field 2 EN2
+Field 1 EN1
+Field 0 EN0
+EndSysregFields
+
+Sysreg ICH_PPI_ENABLER0_EL2 3 4 12 10 2
+Fields ICH_PPI_ENABLERx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_ENABLER1_EL2 3 4 12 10 3
+Fields ICH_PPI_ENABLERx_EL2
+EndSysreg
+
+SysregFields ICH_PPI_PENDRx_EL2
+Field 63 PEND63
+Field 62 PEND62
+Field 61 PEND61
+Field 60 PEND60
+Field 59 PEND59
+Field 58 PEND58
+Field 57 PEND57
+Field 56 PEND56
+Field 55 PEND55
+Field 54 PEND54
+Field 53 PEND53
+Field 52 PEND52
+Field 51 PEND51
+Field 50 PEND50
+Field 49 PEND49
+Field 48 PEND48
+Field 47 PEND47
+Field 46 PEND46
+Field 45 PEND45
+Field 44 PEND44
+Field 43 PEND43
+Field 42 PEND42
+Field 41 PEND41
+Field 40 PEND40
+Field 39 PEND39
+Field 38 PEND38
+Field 37 PEND37
+Field 36 PEND36
+Field 35 PEND35
+Field 34 PEND34
+Field 33 PEND33
+Field 32 PEND32
+Field 31 PEND31
+Field 30 PEND30
+Field 29 PEND29
+Field 28 PEND28
+Field 27 PEND27
+Field 26 PEND26
+Field 25 PEND25
+Field 24 PEND24
+Field 23 PEND23
+Field 22 PEND22
+Field 21 PEND21
+Field 20 PEND20
+Field 19 PEND19
+Field 18 PEND18
+Field 17 PEND17
+Field 16 PEND16
+Field 15 PEND15
+Field 14 PEND14
+Field 13 PEND13
+Field 12 PEND12
+Field 11 PEND11
+Field 10 PEND10
+Field 9 PEND9
+Field 8 PEND8
+Field 7 PEND7
+Field 6 PEND6
+Field 5 PEND5
+Field 4 PEND4
+Field 3 PEND3
+Field 2 PEND2
+Field 1 PEND1
+Field 0 PEND0
+EndSysregFields
+
+Sysreg ICH_PPI_PENDR0_EL2 3 4 12 10 4
+Fields ICH_PPI_PENDRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PENDR1_EL2 3 4 12 10 5
+Fields ICH_PPI_PENDRx_EL2
+EndSysreg
+
+SysregFields ICH_PPI_ACTIVERx_EL2
+Field 63 ACTIVE63
+Field 62 ACTIVE62
+Field 61 ACTIVE61
+Field 60 ACTIVE60
+Field 59 ACTIVE59
+Field 58 ACTIVE58
+Field 57 ACTIVE57
+Field 56 ACTIVE56
+Field 55 ACTIVE55
+Field 54 ACTIVE54
+Field 53 ACTIVE53
+Field 52 ACTIVE52
+Field 51 ACTIVE51
+Field 50 ACTIVE50
+Field 49 ACTIVE49
+Field 48 ACTIVE48
+Field 47 ACTIVE47
+Field 46 ACTIVE46
+Field 45 ACTIVE45
+Field 44 ACTIVE44
+Field 43 ACTIVE43
+Field 42 ACTIVE42
+Field 41 ACTIVE41
+Field 40 ACTIVE40
+Field 39 ACTIVE39
+Field 38 ACTIVE38
+Field 37 ACTIVE37
+Field 36 ACTIVE36
+Field 35 ACTIVE35
+Field 34 ACTIVE34
+Field 33 ACTIVE33
+Field 32 ACTIVE32
+Field 31 ACTIVE31
+Field 30 ACTIVE30
+Field 29 ACTIVE29
+Field 28 ACTIVE28
+Field 27 ACTIVE27
+Field 26 ACTIVE26
+Field 25 ACTIVE25
+Field 24 ACTIVE24
+Field 23 ACTIVE23
+Field 22 ACTIVE22
+Field 21 ACTIVE21
+Field 20 ACTIVE20
+Field 19 ACTIVE19
+Field 18 ACTIVE18
+Field 17 ACTIVE17
+Field 16 ACTIVE16
+Field 15 ACTIVE15
+Field 14 ACTIVE14
+Field 13 ACTIVE13
+Field 12 ACTIVE12
+Field 11 ACTIVE11
+Field 10 ACTIVE10
+Field 9 ACTIVE9
+Field 8 ACTIVE8
+Field 7 ACTIVE7
+Field 6 ACTIVE6
+Field 5 ACTIVE5
+Field 4 ACTIVE4
+Field 3 ACTIVE3
+Field 2 ACTIVE2
+Field 1 ACTIVE1
+Field 0 ACTIVE0
+EndSysregFields
+
+Sysreg ICH_PPI_ACTIVER0_EL2 3 4 12 10 6
+Fields ICH_PPI_ACTIVERx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_ACTIVER1_EL2 3 4 12 10 7
+Fields ICH_PPI_ACTIVERx_EL2
+EndSysreg
+
Sysreg ICH_HCR_EL2 3 4 12 11 0
Res0 63:32
Field 31:27 EOIcount
@@ -4789,6 +5174,18 @@ Field 1 V3
Field 0 En
EndSysreg
+Sysreg ICH_CONTEXTR_EL2 3 4 12 11 6
+Field 63 V
+Field 62 F
+Field 61 IRICHPPIDIS
+Field 60 DB
+Field 59:55 DBPM
+Res0 54:48
+Field 47:32 VPE
+Res0 31:16
+Field 15:0 VM
+EndSysreg
+
Sysreg ICH_VMCR_EL2 3 4 12 11 7
Prefix FEAT_GCIE
Res0 63:32
@@ -4810,6 +5207,89 @@ Field 1 VENG1
Field 0 VENG0
EndSysreg
+SysregFields ICH_PPI_PRIORITYRx_EL2
+Res0 63:61
+Field 60:56 Priority7
+Res0 55:53
+Field 52:48 Priority6
+Res0 47:45
+Field 44:40 Priority5
+Res0 39:37
+Field 36:32 Priority4
+Res0 31:29
+Field 28:24 Priority3
+Res0 23:21
+Field 20:16 Priority2
+Res0 15:13
+Field 12:8 Priority1
+Res0 7:5
+Field 4:0 Priority0
+EndSysregFields
+
+Sysreg ICH_PPI_PRIORITYR0_EL2 3 4 12 14 0
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR1_EL2 3 4 12 14 1
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR2_EL2 3 4 12 14 2
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR3_EL2 3 4 12 14 3
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR4_EL2 3 4 12 14 4
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR5_EL2 3 4 12 14 5
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR6_EL2 3 4 12 14 6
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR7_EL2 3 4 12 14 7
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR8_EL2 3 4 12 15 0
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR9_EL2 3 4 12 15 1
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR10_EL2 3 4 12 15 2
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR11_EL2 3 4 12 15 3
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR12_EL2 3 4 12 15 4
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR13_EL2 3 4 12 15 5
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR14_EL2 3 4 12 15 6
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
+Sysreg ICH_PPI_PRIORITYR15_EL2 3 4 12 15 7
+Fields ICH_PPI_PRIORITYRx_EL2
+EndSysreg
+
Sysreg CONTEXTIDR_EL2 3 4 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
diff --git a/drivers/irqchip/irq-gic-v5.c b/drivers/irqchip/irq-gic-v5.c
index 405a5eee847b..6b0903be8ebf 100644
--- a/drivers/irqchip/irq-gic-v5.c
+++ b/drivers/irqchip/irq-gic-v5.c
@@ -511,6 +511,23 @@ static bool gicv5_ppi_irq_is_level(irq_hw_number_t hwirq)
return !!(read_ppi_sysreg_s(hwirq, PPI_HM) & bit);
}
+static int gicv5_ppi_irq_set_type(struct irq_data *d, unsigned int type)
+{
+ /*
+ * GICv5's PPIs do not have a configurable trigger or handling
+ * mode. Check that the attempt to set a type matches what the
+ * hardware reports in the HMR, and error on a mismatch.
+ */
+
+ if (type & IRQ_TYPE_EDGE_BOTH && gicv5_ppi_irq_is_level(d->hwirq))
+ return -EINVAL;
+
+ if (type & IRQ_TYPE_LEVEL_MASK && !gicv5_ppi_irq_is_level(d->hwirq))
+ return -EINVAL;
+
+ return 0;
+}
+
static int gicv5_ppi_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
{
if (vcpu)
@@ -526,6 +543,7 @@ static const struct irq_chip gicv5_ppi_irq_chip = {
.irq_mask = gicv5_ppi_irq_mask,
.irq_unmask = gicv5_ppi_irq_unmask,
.irq_eoi = gicv5_ppi_irq_eoi,
+ .irq_set_type = gicv5_ppi_irq_set_type,
.irq_get_irqchip_state = gicv5_ppi_irq_get_irqchip_state,
.irq_set_irqchip_state = gicv5_ppi_irq_set_irqchip_state,
.irq_set_vcpu_affinity = gicv5_ppi_irq_set_vcpu_affinity,
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 7310841f4512..a7754e0a2ef4 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -10,6 +10,8 @@
#include <linux/clocksource.h>
#include <linux/hrtimer.h>
+#include <linux/irqchip/arm-gic-v5.h>
+
enum kvm_arch_timers {
TIMER_PTIMER,
TIMER_VTIMER,
@@ -47,7 +49,7 @@ struct arch_timer_vm_data {
u64 poffset;
/* The PPI for each timer, global to the VM */
- u8 ppi[NR_KVM_TIMERS];
+ u32 ppi[NR_KVM_TIMERS];
};
struct arch_timer_context {
@@ -74,6 +76,9 @@ struct arch_timer_context {
/* Duplicated state from arch_timer.c for convenience */
u32 host_timer_irq;
+
+ /* Is this a direct timer? */
+ bool direct;
};
struct timer_map {
@@ -130,6 +135,10 @@ void kvm_timer_init_vhe(void);
#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data))
#define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)])
+#define get_vgic_ppi(k, i) (((k)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V5) ? \
+ (i) : (FIELD_PREP(GICV5_HWIRQ_ID, i) | \
+ FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_PPI)))
+
u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
enum kvm_arch_timers tmr,
enum kvm_arch_timer_regs treg);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 96754b51b411..0a36a3d5c894 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -12,6 +12,9 @@
#define KVM_ARMV8_PMU_MAX_COUNTERS 32
+/* PPI #23 - architecturally specified for GICv5 */
+#define KVM_ARMV8_PMU_GICV5_IRQ 0x20000017
+
#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
struct kvm_pmc {
u8 idx; /* index into the pmu->pmc array */
@@ -38,7 +41,7 @@ struct arm_pmu_entry {
};
bool kvm_supports_guest_pmuv3(void);
-#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
+#define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num != 0)
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f2eafc65bbf4..1388dc6028a9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -19,7 +19,9 @@
#include <linux/jump_label.h>
#include <linux/irqchip/arm-gic-v4.h>
+#include <linux/irqchip/arm-gic-v5.h>
+#define VGIC_V5_MAX_CPUS 512
#define VGIC_V3_MAX_CPUS 512
#define VGIC_V2_MAX_CPUS 8
#define VGIC_NR_IRQS_LEGACY 256
@@ -31,9 +33,96 @@
#define VGIC_MIN_LPI 8192
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
-#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
-#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \
- (irq) <= VGIC_MAX_SPI)
+/*
+ * GICv5 supports 128 PPIs, but only the first 64 are architected. We only
+ * support the timers and PMU in KVM, both of which are architected. Rather than
+ * handling twice the state, we instead opt to only support the architected set
+ * in KVM for now. At a future stage, this can be bumped up to 128, if required.
+ */
+#define VGIC_V5_NR_PRIVATE_IRQS 64
+
+#define is_v5_type(t, i) (FIELD_GET(GICV5_HWIRQ_TYPE, (i)) == (t))
+
+#define __irq_is_sgi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = false; \
+ break; \
+ default: \
+ __ret = (i) < VGIC_NR_SGIS; \
+ } \
+ \
+ __ret; \
+ })
+
+#define __irq_is_ppi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = is_v5_type(GICV5_HWIRQ_TYPE_PPI, (i)); \
+ break; \
+ default: \
+ __ret = (i) >= VGIC_NR_SGIS; \
+ __ret &= (i) < VGIC_NR_PRIVATE_IRQS; \
+ } \
+ \
+ __ret; \
+ })
+
+#define __irq_is_spi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = is_v5_type(GICV5_HWIRQ_TYPE_SPI, (i)); \
+ break; \
+ default: \
+ __ret = (i) <= VGIC_MAX_SPI; \
+ __ret &= (i) >= VGIC_NR_PRIVATE_IRQS; \
+ } \
+ \
+ __ret; \
+ })
+
+#define __irq_is_lpi(t, i) \
+ ({ \
+ bool __ret; \
+ \
+ switch (t) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret = is_v5_type(GICV5_HWIRQ_TYPE_LPI, (i)); \
+ break; \
+ default: \
+ __ret = (i) >= 8192; \
+ } \
+ \
+ __ret; \
+ })
+
+#define irq_is_sgi(k, i) __irq_is_sgi((k)->arch.vgic.vgic_model, i)
+#define irq_is_ppi(k, i) __irq_is_ppi((k)->arch.vgic.vgic_model, i)
+#define irq_is_spi(k, i) __irq_is_spi((k)->arch.vgic.vgic_model, i)
+#define irq_is_lpi(k, i) __irq_is_lpi((k)->arch.vgic.vgic_model, i)
+
+#define irq_is_private(k, i) (irq_is_ppi(k, i) || irq_is_sgi(k, i))
+
+#define vgic_v5_get_hwirq_id(x) FIELD_GET(GICV5_HWIRQ_ID, (x))
+#define vgic_v5_set_hwirq_id(x) FIELD_PREP(GICV5_HWIRQ_ID, (x))
+
+#define __vgic_v5_set_type(t) (FIELD_PREP(GICV5_HWIRQ_TYPE, GICV5_HWIRQ_TYPE_##t))
+#define vgic_v5_make_ppi(x) (__vgic_v5_set_type(PPI) | vgic_v5_set_hwirq_id(x))
+#define vgic_v5_make_spi(x) (__vgic_v5_set_type(SPI) | vgic_v5_set_hwirq_id(x))
+#define vgic_v5_make_lpi(x) (__vgic_v5_set_type(LPI) | vgic_v5_set_hwirq_id(x))
+
+#define __vgic_is_v(k, v) ((k)->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V##v)
+#define vgic_is_v3(k) (__vgic_is_v(k, 3))
+#define vgic_is_v5(k) (__vgic_is_v(k, 5))
enum vgic_type {
VGIC_V2, /* Good ol' GICv2 */
@@ -101,6 +190,8 @@ enum vgic_irq_config {
VGIC_CONFIG_LEVEL
};
+struct vgic_irq;
+
/*
* Per-irq ops overriding some common behavious.
*
@@ -119,6 +210,19 @@ struct irq_ops {
* peaking into the physical GIC.
*/
bool (*get_input_level)(int vintid);
+
+ /*
+ * Function pointer to override the queuing of an IRQ.
+ */
+ bool (*queue_irq_unlock)(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags) __releases(&irq->irq_lock);
+
+ /*
+ * Callback function pointer to either enable or disable direct
+ * injection for a mapped interrupt.
+ */
+ void (*set_direct_injection)(struct kvm_vcpu *vcpu,
+ struct vgic_irq *irq, bool direct);
};
struct vgic_irq {
@@ -238,6 +342,26 @@ struct vgic_redist_region {
struct list_head list;
};
+struct vgic_v5_vm {
+ /*
+ * We only expose a subset of PPIs to the guest. This subset is a
+ * combination of the PPIs that are actually implemented and what we
+ * actually choose to expose.
+ */
+ DECLARE_BITMAP(vgic_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+
+ /* A mask of the PPIs that are exposed for userspace to drive. */
+ DECLARE_BITMAP(userspace_ppis, VGIC_V5_NR_PRIVATE_IRQS);
+
+ /*
+ * The HMR itself is handled by the hardware, but we still need to have
+ * a mask that we can use when merging in pending state (only the state
+ * of Edge PPIs is merged back in from the guest an the HMR provides a
+ * convenient way to do that).
+ */
+ DECLARE_BITMAP(vgic_ppi_hmr, VGIC_V5_NR_PRIVATE_IRQS);
+};
+
struct vgic_dist {
bool in_kernel;
bool ready;
@@ -310,6 +434,11 @@ struct vgic_dist {
* else.
*/
struct its_vm its_vm;
+
+ /*
+ * GICv5 per-VM data.
+ */
+ struct vgic_v5_vm gicv5_vm;
};
struct vgic_v2_cpu_if {
@@ -340,11 +469,40 @@ struct vgic_v3_cpu_if {
unsigned int used_lrs;
};
+struct vgic_v5_cpu_if {
+ u64 vgic_apr;
+ u64 vgic_vmcr;
+
+ /* PPI register state */
+ DECLARE_BITMAP(vgic_ppi_dvir, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(vgic_ppi_activer, VGIC_V5_NR_PRIVATE_IRQS);
+ DECLARE_BITMAP(vgic_ppi_enabler, VGIC_V5_NR_PRIVATE_IRQS);
+ /* We have one byte (of which 5 bits are used) per PPI for priority */
+ u64 vgic_ppi_priorityr[VGIC_V5_NR_PRIVATE_IRQS / 8];
+
+ /*
+ * The ICSR is re-used across host and guest, and hence it needs to be
+ * saved/restored. Only one copy is required as the host should block
+ * preemption between executing GIC CDRCFG and acccessing the
+ * ICC_ICSR_EL1. A guest, of course, can never guarantee this, and hence
+ * it is the hyp's responsibility to keep the state constistent.
+ */
+ u64 vgic_icsr;
+
+ struct gicv5_vpe gicv5_vpe;
+};
+
+/* What PPI capabilities does a GICv5 host have */
+struct vgic_v5_ppi_caps {
+ DECLARE_BITMAP(impl_ppi_mask, VGIC_V5_NR_PRIVATE_IRQS);
+};
+
struct vgic_cpu {
/* CPU vif control registers for world switch */
union {
struct vgic_v2_cpu_if vgic_v2;
struct vgic_v3_cpu_if vgic_v3;
+ struct vgic_v5_cpu_if vgic_v5;
};
struct vgic_irq *private_irqs;
@@ -392,13 +550,17 @@ int kvm_vgic_create(struct kvm *kvm, u32 type);
void kvm_vgic_destroy(struct kvm *kvm);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm);
+void kvm_vgic_finalize_idregs(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
unsigned int intid, bool level, void *owner);
+void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
+ struct irq_ops *ops);
+void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid);
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
- u32 vintid, struct irq_ops *ops);
+ u32 vintid);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid);
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
@@ -414,8 +576,20 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) ((k)->arch.vgic.initialized)
-#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
- ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
+#define vgic_valid_spi(k, i) \
+ ({ \
+ bool __ret = irq_is_spi(k, i); \
+ \
+ switch ((k)->arch.vgic.vgic_model) { \
+ case KVM_DEV_TYPE_ARM_VGIC_V5: \
+ __ret &= FIELD_GET(GICV5_HWIRQ_ID, i) < (k)->arch.vgic.nr_spis; \
+ break; \
+ default: \
+ __ret &= (i) < ((k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); \
+ } \
+ \
+ __ret; \
+ })
bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
@@ -455,6 +629,11 @@ int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
int vgic_v4_put(struct kvm_vcpu *vcpu);
+int vgic_v5_finalize_ppi_state(struct kvm *kvm);
+bool vgic_v5_ppi_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
+ unsigned long flags);
+void vgic_v5_set_ppi_dvi(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool dvi);
+
bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
/* CPU HP callbacks */
diff --git a/include/linux/irqchip/arm-gic-v5.h b/include/linux/irqchip/arm-gic-v5.h
index b78488df6c98..40d2fce68294 100644
--- a/include/linux/irqchip/arm-gic-v5.h
+++ b/include/linux/irqchip/arm-gic-v5.h
@@ -25,6 +25,28 @@
#define GICV5_HWIRQ_TYPE_SPI UL(0x3)
/*
+ * Architected PPIs
+ */
+#define GICV5_ARCH_PPI_S_DB_PPI 0x0
+#define GICV5_ARCH_PPI_RL_DB_PPI 0x1
+#define GICV5_ARCH_PPI_NS_DB_PPI 0x2
+#define GICV5_ARCH_PPI_SW_PPI 0x3
+#define GICV5_ARCH_PPI_HACDBSIRQ 0xf
+#define GICV5_ARCH_PPI_CNTHVS 0x13
+#define GICV5_ARCH_PPI_CNTHPS 0x14
+#define GICV5_ARCH_PPI_PMBIRQ 0x15
+#define GICV5_ARCH_PPI_COMMIRQ 0x16
+#define GICV5_ARCH_PPI_PMUIRQ 0x17
+#define GICV5_ARCH_PPI_CTIIRQ 0x18
+#define GICV5_ARCH_PPI_GICMNT 0x19
+#define GICV5_ARCH_PPI_CNTHP 0x1a
+#define GICV5_ARCH_PPI_CNTV 0x1b
+#define GICV5_ARCH_PPI_CNTHV 0x1c
+#define GICV5_ARCH_PPI_CNTPS 0x1d
+#define GICV5_ARCH_PPI_CNTP 0x1e
+#define GICV5_ARCH_PPI_TRBIRQ 0x1f
+
+/*
* Tables attributes
*/
#define GICV5_NO_READ_ALLOC 0b0
@@ -365,6 +387,11 @@ int gicv5_spi_irq_set_type(struct irq_data *d, unsigned int type);
int gicv5_irs_iste_alloc(u32 lpi);
void gicv5_irs_syncr(void);
+/* Embedded in kvm.arch */
+struct gicv5_vpe {
+ bool resident;
+};
+
struct gicv5_its_devtab_cfg {
union {
struct {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6b76e7a6f4c2..779d9ed85cbf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2366,6 +2366,7 @@ void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v5_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 80364d4dbebb..d0c0c8605976 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1224,6 +1224,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC
KVM_DEV_TYPE_LOONGARCH_PCHPIC,
#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
+ KVM_DEV_TYPE_ARM_VGIC_V5,
+#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5
KVM_DEV_TYPE_MAX,
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index a792a599b9d6..1c13bfa2d38a 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -428,6 +428,7 @@ enum {
#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
+#define KVM_DEV_ARM_VGIC_USERSPACE_PPIS 5
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 65500f5db379..713e4360eca0 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1220,6 +1220,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC
KVM_DEV_TYPE_LOONGARCH_PCHPIC,
#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
+ KVM_DEV_TYPE_ARM_VGIC_V5,
+#define KVM_DEV_TYPE_ARM_VGIC_V5 KVM_DEV_TYPE_ARM_VGIC_V5
KVM_DEV_TYPE_MAX,
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index dc68371f76a3..98da9fa4b8b7 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -177,8 +177,9 @@ TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
TEST_GEN_PROGS_arm64 += arm64/vgic_init
TEST_GEN_PROGS_arm64 += arm64/vgic_irq
TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vgic_v5
TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
-TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/no-vgic
TEST_GEN_PROGS_arm64 += arm64/idreg-idst
TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
deleted file mode 100644
index 152c34776981..000000000000
--- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Check that, on a GICv3 system, not configuring GICv3 correctly
-// results in all of the sysregs generating an UNDEF exception.
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-static volatile bool handled;
-
-#define __check_sr_read(r) \
- ({ \
- uint64_t val; \
- \
- handled = false; \
- dsb(sy); \
- val = read_sysreg_s(SYS_ ## r); \
- val; \
- })
-
-#define __check_sr_write(r) \
- do { \
- handled = false; \
- dsb(sy); \
- write_sysreg_s(0, SYS_ ## r); \
- isb(); \
- } while(0)
-
-/* Fatal checks */
-#define check_sr_read(r) \
- do { \
- __check_sr_read(r); \
- __GUEST_ASSERT(handled, #r " no read trap"); \
- } while(0)
-
-#define check_sr_write(r) \
- do { \
- __check_sr_write(r); \
- __GUEST_ASSERT(handled, #r " no write trap"); \
- } while(0)
-
-#define check_sr_rw(r) \
- do { \
- check_sr_read(r); \
- check_sr_write(r); \
- } while(0)
-
-static void guest_code(void)
-{
- uint64_t val;
-
- /*
- * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
- * hidden the feature at runtime without any other userspace action.
- */
- __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
- read_sysreg(id_aa64pfr0_el1)) == 0,
- "GICv3 wrongly advertised");
-
- /*
- * Access all GICv3 registers, and fail if we don't get an UNDEF.
- * Note that we happily access all the APxRn registers without
- * checking their existance, as all we want to see is a failure.
- */
- check_sr_rw(ICC_PMR_EL1);
- check_sr_read(ICC_IAR0_EL1);
- check_sr_write(ICC_EOIR0_EL1);
- check_sr_rw(ICC_HPPIR0_EL1);
- check_sr_rw(ICC_BPR0_EL1);
- check_sr_rw(ICC_AP0R0_EL1);
- check_sr_rw(ICC_AP0R1_EL1);
- check_sr_rw(ICC_AP0R2_EL1);
- check_sr_rw(ICC_AP0R3_EL1);
- check_sr_rw(ICC_AP1R0_EL1);
- check_sr_rw(ICC_AP1R1_EL1);
- check_sr_rw(ICC_AP1R2_EL1);
- check_sr_rw(ICC_AP1R3_EL1);
- check_sr_write(ICC_DIR_EL1);
- check_sr_read(ICC_RPR_EL1);
- check_sr_write(ICC_SGI1R_EL1);
- check_sr_write(ICC_ASGI1R_EL1);
- check_sr_write(ICC_SGI0R_EL1);
- check_sr_read(ICC_IAR1_EL1);
- check_sr_write(ICC_EOIR1_EL1);
- check_sr_rw(ICC_HPPIR1_EL1);
- check_sr_rw(ICC_BPR1_EL1);
- check_sr_rw(ICC_CTLR_EL1);
- check_sr_rw(ICC_IGRPEN0_EL1);
- check_sr_rw(ICC_IGRPEN1_EL1);
-
- /*
- * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
- * be RAO/WI. Engage in non-fatal accesses, starting with a
- * write of 0 to try and disable SRE, and let's see if it
- * sticks.
- */
- __check_sr_write(ICC_SRE_EL1);
- if (!handled)
- GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
-
- val = __check_sr_read(ICC_SRE_EL1);
- if (!handled) {
- __GUEST_ASSERT((val & BIT(0)),
- "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
- GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
- }
-
- GUEST_DONE();
-}
-
-static void guest_undef_handler(struct ex_regs *regs)
-{
- /* Success, we've gracefully exploded! */
- handled = true;
- regs->pc += 4;
-}
-
-static void test_run_vcpu(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- do {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_PRINTF:
- printf("%s", uc.buffer);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- } while (uc.cmd != UCALL_DONE);
-}
-
-static void test_guest_no_gicv3(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /* Create a VM without a GICv3 */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_UNKNOWN, guest_undef_handler);
-
- test_run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t pfr0;
-
- test_disable_default_vgic();
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
- pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0),
- "GICv3 not supported.");
- kvm_vm_free(vm);
-
- test_guest_no_gicv3();
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c
new file mode 100644
index 000000000000..b14686ef17d1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/no-vgic.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3-capable system (GICv3 native, or GICv5 with
+// FEAT_GCIE_LEGACY), not configuring GICv3 correctly results in all
+// of the sysregs generating an UNDEF exception. Do the same for GICv5
+// on a GICv5 host.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#include <arm64/gic_v5.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while (0)
+
+#define __check_gicv5_gicr_op(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(GICV5_OP_GICR_ ## r); \
+ val; \
+ })
+
+#define __check_gicv5_gic_op(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, GICV5_OP_GIC_ ## r); \
+ isb(); \
+ } while (0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while (0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while (0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while (0)
+
+#define check_gicv5_gicr_op(r) \
+ do { \
+ __check_gicv5_gicr_op(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while (0)
+
+#define check_gicv5_gic_op(r) \
+ do { \
+ __check_gicv5_gic_op(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while (0)
+
+static void guest_code_gicv3(void)
+{
+ uint64_t val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existence, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_gicv5(void)
+{
+ /*
+ * Check that we advertise that ID_AA64PFR2_EL1.GCIE == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR2_EL1_GCIE,
+ read_sysreg_s(SYS_ID_AA64PFR2_EL1)) == 0,
+ "GICv5 wrongly advertised");
+
+ /*
+ * Try all GICv5 instructions, and fail if we don't get an UNDEF.
+ */
+ check_gicv5_gic_op(CDAFF);
+ check_gicv5_gic_op(CDDI);
+ check_gicv5_gic_op(CDDIS);
+ check_gicv5_gic_op(CDEOI);
+ check_gicv5_gic_op(CDHM);
+ check_gicv5_gic_op(CDPEND);
+ check_gicv5_gic_op(CDPRI);
+ check_gicv5_gic_op(CDRCFG);
+ check_gicv5_gicr_op(CDIA);
+ check_gicv5_gicr_op(CDNMIA);
+
+ /* Check General System Register acccesses */
+ check_sr_rw(ICC_APR_EL1);
+ check_sr_rw(ICC_CR0_EL1);
+ check_sr_read(ICC_HPPIR_EL1);
+ check_sr_read(ICC_IAFFIDR_EL1);
+ check_sr_rw(ICC_ICSR_EL1);
+ check_sr_read(ICC_IDR0_EL1);
+ check_sr_rw(ICC_PCR_EL1);
+
+ /* Check PPI System Register accessess */
+ check_sr_rw(ICC_PPI_CACTIVER0_EL1);
+ check_sr_rw(ICC_PPI_CACTIVER1_EL1);
+ check_sr_rw(ICC_PPI_SACTIVER0_EL1);
+ check_sr_rw(ICC_PPI_SACTIVER1_EL1);
+ check_sr_rw(ICC_PPI_CPENDR0_EL1);
+ check_sr_rw(ICC_PPI_CPENDR1_EL1);
+ check_sr_rw(ICC_PPI_SPENDR0_EL1);
+ check_sr_rw(ICC_PPI_SPENDR1_EL1);
+ check_sr_rw(ICC_PPI_ENABLER0_EL1);
+ check_sr_rw(ICC_PPI_ENABLER1_EL1);
+ check_sr_read(ICC_PPI_HMR0_EL1);
+ check_sr_read(ICC_PPI_HMR1_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR0_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR1_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR2_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR3_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR4_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR5_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR6_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR7_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR8_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR9_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR10_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR11_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR12_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR13_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR14_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR15_EL1);
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_vgic(void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GIC */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool has_v3, has_v5;
+ uint64_t pfr;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ has_v3 = !!FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr);
+
+ pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR2_EL1));
+ has_v5 = !!FIELD_GET(ID_AA64PFR2_EL1_GCIE, pfr);
+
+ kvm_vm_free(vm);
+
+ __TEST_REQUIRE(has_v3 || has_v5,
+ "Neither GICv3 nor GICv5 supported.");
+
+ if (has_v3) {
+ pr_info("Testing no-vgic-v3\n");
+ test_guest_no_vgic(guest_code_gicv3);
+ } else {
+ pr_info("No GICv3 support: skipping no-vgic-v3 test\n");
+ }
+
+ if (has_v5) {
+ pr_info("Testing no-vgic-v5\n");
+ test_guest_no_vgic(guest_code_gicv5);
+ } else {
+ pr_info("No GICv5 support: skipping no-vgic-v5 test\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c
new file mode 100644
index 000000000000..3ce6cf37a629
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include <arm64/gic_v5.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS 1
+
+struct vm_gic {
+ struct kvm_vm *vm;
+ int gic_fd;
+ uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+#define GUEST_CMD_IRQ_CDIA 10
+#define GUEST_CMD_IRQ_DIEOI 11
+#define GUEST_CMD_IS_AWAKE 12
+#define GUEST_CMD_IS_READY 13
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ bool valid;
+ u32 hwirq;
+ u64 ia;
+ static int count;
+
+ /*
+ * We have pending interrupts. Should never actually enter WFI
+ * here!
+ */
+ wfi();
+ GUEST_SYNC(GUEST_CMD_IS_AWAKE);
+
+ ia = gicr_insn(CDIA);
+ valid = GICV5_GICR_CDIA_VALID(ia);
+
+ GUEST_SYNC(GUEST_CMD_IRQ_CDIA);
+
+ if (!valid)
+ return;
+
+ gsb_ack();
+ isb();
+
+ hwirq = FIELD_GET(GICV5_GICR_CDIA_INTID, ia);
+
+ gic_insn(hwirq, CDDI);
+ gic_insn(0, CDEOI);
+
+ GUEST_SYNC(GUEST_CMD_IRQ_DIEOI);
+
+ if (++count >= 2)
+ GUEST_DONE();
+
+ /* Ask for the next interrupt to be injected */
+ GUEST_SYNC(GUEST_CMD_IS_READY);
+}
+
+static void guest_code(void)
+{
+ local_irq_disable();
+
+ gicv5_cpu_enable_interrupts();
+ local_irq_enable();
+
+ /* Enable the SW_PPI (3) */
+ write_sysreg_s(BIT_ULL(3), SYS_ICC_PPI_ENABLER0_EL1);
+
+ /* Ask for the first interrupt to be injected */
+ GUEST_SYNC(GUEST_CMD_IS_READY);
+
+ /* Loop forever waiting for interrupts */
+ while (1);
+}
+
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+ close(v->gic_fd);
+ kvm_vm_free(v->vm);
+}
+
+static void test_vgic_v5_ppis(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct ucall uc;
+ u64 user_ppis[2];
+ struct vm_gic v;
+ int ret, i;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = __vm_create(VM_SHAPE_DEFAULT, NR_VCPUS, 0);
+
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ for (i = 0; i < NR_VCPUS; i++)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ vm_init_descriptor_tables(v.vm);
+ vm_install_exception_handler(v.vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ for (i = 0; i < NR_VCPUS; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ /* Read out the PPIs that user space is allowed to drive. */
+ kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_USERSPACE_PPIS, &user_ppis);
+
+ /* We should always be able to drive the SW_PPI. */
+ TEST_ASSERT(user_ppis[0] & BIT(GICV5_ARCH_PPI_SW_PPI),
+ "SW_PPI is not drivable by userspace");
+
+ while (1) {
+ ret = run_vcpu(vcpus[0]);
+
+ switch (get_ucall(vcpus[0], &uc)) {
+ case UCALL_SYNC:
+ /*
+ * The guest is ready for the next level change. Set
+ * high if ready, and lower if it has been consumed.
+ */
+ if (uc.args[1] == GUEST_CMD_IS_READY ||
+ uc.args[1] == GUEST_CMD_IRQ_DIEOI) {
+ u64 irq;
+ bool level = uc.args[1] == GUEST_CMD_IRQ_DIEOI ? 0 : 1;
+
+ irq = FIELD_PREP(KVM_ARM_IRQ_NUM_MASK, 3);
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ _kvm_irq_line(v.vm, irq, level);
+ } else if (uc.args[1] == GUEST_CMD_IS_AWAKE) {
+ pr_info("Guest skipping WFI due to pending IRQ\n");
+ } else if (uc.args[1] == GUEST_CMD_IRQ_CDIA) {
+ pr_info("Guest acknowledged IRQ\n");
+ }
+
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ TEST_ASSERT(ret == 0, "Failed to test GICv5 PPIs");
+
+ vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V5).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret;
+
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+ /* try to create a non existing KVM device */
+ ret = __kvm_test_create_device(v.vm, 0);
+ TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+ /* trial mode */
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+ vm_gic_destroy(&v);
+
+ return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+ pr_info("Test VGICv5 PPIs\n");
+ test_vgic_v5_ppis(gic_dev_type);
+}
+
+int main(int ac, char **av)
+{
+ int ret;
+ int pa_bits;
+
+ test_disable_default_vgic();
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V5);
+ if (ret) {
+ pr_info("No GICv5 support; Not running GIC_v5 tests.\n");
+ exit(KSFT_SKIP);
+ }
+
+ pr_info("Running VGIC_V5 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V5);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v5.h b/tools/testing/selftests/kvm/include/arm64/gic_v5.h
new file mode 100644
index 000000000000..eb523d9277cf
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v5.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __SELFTESTS_GIC_V5_H
+#define __SELFTESTS_GIC_V5_H
+
+#include <asm/barrier.h>
+#include <asm/sysreg.h>
+
+#include <linux/bitfield.h>
+
+#include "processor.h"
+
+/*
+ * Definitions for GICv5 instructions for the Current Domain
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+#define GICV5_OP_GICR_CDNMIA sys_insn(1, 0, 12, 3, 1)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GICR_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GICR_CDIA_VALID_MASK, r)
+#define GICV5_GICR_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDIA_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GICR_CDIA_INTID GENMASK_ULL(31, 0)
+
+/* Definitions for GICR CDNMIA */
+#define GICV5_GICR_CDNMIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDNMIA_VALID(r) FIELD_GET(GICV5_GICR_CDNMIA_VALID_MASK, r)
+#define GICV5_GICR_CDNMIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDNMIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
+
+#define __GIC_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
+
+#define GSB_SYS_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 1, 31)
+
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+#define GICV5_IRQ_DEFAULT_PRI 0b10000
+
+#define GICV5_ARCH_PPI_SW_PPI 0x3
+
+void gicv5_ppi_priority_init(void)
+{
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR0_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR1_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR2_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR3_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR4_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR5_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR6_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR7_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR8_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR9_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR10_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR11_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR12_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR13_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR14_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR15_EL1);
+
+ /*
+ * Context syncronization required to make sure system register writes
+ * effects are synchronised.
+ */
+ isb();
+}
+
+void gicv5_cpu_disable_interrupts(void)
+{
+ u64 cr0;
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+void gicv5_cpu_enable_interrupts(void)
+{
+ u64 cr0, pcr;
+
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1);
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1);
+
+ gicv5_ppi_priority_init();
+
+ pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_DEFAULT_PRI);
+ write_sysreg_s(pcr, SYS_ICC_PCR_EL1);
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+#endif