KVM/arm64 updates for Linux 5.8:

- Move the arch-specific code into arch/arm64/kvm
 - Start the post-32bit cleanup
 - Cherry-pick a few non-invasive pre-NV patches
 -----BEGIN PGP SIGNATURE-----
 
 iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAl7RLp8PHG1hekBrZXJu
 ZWwub3JnAAoJECPQ0LrRPXpD/iAQAJOHsS1PT9y/Gefam5os9FqKpogj68e3rx9k
 XfPcweexBVqmDWSI4vmL9xHW2F7z4EwAE4dIDsTCKHpihK30+jH8l12tOJBz35yp
 MR1hYjv43F54xzKkkuP4F4wo3Ygg4ipjHZPReGkaGj1QOQs6N/YKa1aSSYfzkzCz
 VLCSqPQz45CkGPYEGwuPn13AjHqGQAwPhteJNAoCxViw1KAldmoqDk6kbKB+b+7a
 2oIvxiTZejICsgSX6UvqQYNG52AyZ/5Daq8iraaigQ8sGyKr+/2Yi+3RUUH6p7ns
 aCsictk+RS3BzMAKDw6MPYc7OhJBhxQEV1pdiPpt0tpS4L9LNmBagKzlaBKZhwdr
 dYDAjOlbgZZUJpKnlBAipuVlQySHdm2WjXr4msdY69D7OGxmkzU/zkSIokqdA2hr
 MuL5W1v2Z1UpxyVltb+c/4lPcFZNnRI0Mz1WcvliEojlf2zzKYMcBAl3bTiAuil5
 aTT2+1G0OSCfUfr8Zart4LoAHeczw4zG/Pern+hl92eMXUlX3pIcqzQaLtVmmEE/
 ecPShMowKsXOOGGp/T8Q04N1fr6KzmufP5+kgJDFZfo6iJ6r5uQ9G8nuLmp3wQOX
 c9mNCwdSxrFBTJ10KfLHquKqwfl18VXzKDx1pzO5nSupmKWfWZ5YFO8j2709e83x
 R42MqKEG
 =aD+9
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for Linux 5.8:

- Move the arch-specific code into arch/arm64/kvm
- Start the post-32bit cleanup
- Cherry-pick a few non-invasive pre-NV patches
This commit is contained in:
Paolo Bonzini 2020-06-01 04:26:27 -04:00
commit 380609445c
56 changed files with 629 additions and 639 deletions

View File

@ -5799,7 +5799,7 @@ will be initialized to 1 when created. This also improves performance because
dirty logging can be enabled gradually in small chunks on the first call dirty logging can be enabled gradually in small chunks on the first call
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
x86 for now). x86 and arm64 for now).
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make

View File

@ -9295,7 +9295,6 @@ F: arch/arm64/include/asm/kvm*
F: arch/arm64/include/uapi/asm/kvm* F: arch/arm64/include/uapi/asm/kvm*
F: arch/arm64/kvm/ F: arch/arm64/kvm/
F: include/kvm/arm_* F: include/kvm/arm_*
F: virt/kvm/arm/
KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
L: linux-mips@vger.kernel.org L: linux-mips@vger.kernel.org

View File

@ -64,12 +64,14 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern void __kvm_enable_ssbs(void);
extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u64 __vgic_v3_read_vmcr(void); extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_write_vmcr(u32 vmcr);

View File

@ -46,6 +46,9 @@
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl; extern unsigned int kvm_sve_max_vl;
@ -112,12 +115,8 @@ struct kvm_vcpu_fault_info {
u64 disr_el1; /* Deferred [SError] Status Register */ u64 disr_el1; /* Deferred [SError] Status Register */
}; };
/*
* 0 is reserved as an invalid value.
* Order should be kept in sync with the save/restore code.
*/
enum vcpu_sysreg { enum vcpu_sysreg {
__INVALID_SYSREG__, __INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */ MPIDR_EL1, /* MultiProcessor Affinity Register */
CSSELR_EL1, /* Cache Size Selection Register */ CSSELR_EL1, /* Cache Size Selection Register */
SCTLR_EL1, /* System Control Register */ SCTLR_EL1, /* System Control Register */
@ -532,39 +531,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
} }
void __kvm_enable_ssbs(void);
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
/*
* Calculate the raw per-cpu offset without a translation from the
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
(u64)kvm_ksym_ref(kvm_host_data));
/*
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
* wrong, and hyp will crash and burn when it uses any
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
/*
* Disabling SSBD on a non-VHE system requires us to enable SSBS
* at EL2.
*/
if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
kvm_call_hyp(__kvm_enable_ssbs);
}
}
static inline bool kvm_arch_requires_vhe(void) static inline bool kvm_arch_requires_vhe(void)
{ {
/* /*
@ -600,8 +566,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr); struct kvm_device_attr *attr);
static inline void __cpu_init_stage2(void) {}
/* Guest/host FPSIMD coordination helpers */ /* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);

View File

@ -56,12 +56,12 @@
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_enable_traps(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu);

View File

@ -363,8 +363,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
} }
} }
#define kvm_virt_to_phys(x) __pa_symbol(x)
void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
@ -473,7 +471,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
extern void *__kvm_bp_vect_base; extern void *__kvm_bp_vect_base;
extern int __kvm_harden_el2_vector_slot; extern int __kvm_harden_el2_vector_slot;
/* This is only called on a VHE system */ /* This is called on both VHE and !VHE systems */
static inline void *kvm_get_hyp_vector(void) static inline void *kvm_get_hyp_vector(void)
{ {
struct bp_hardening_data *data = arm64_get_bp_hardening_data(); struct bp_hardening_data *data = arm64_get_bp_hardening_data();

View File

@ -35,6 +35,7 @@
#define GIC_PRIO_PSR_I_SET (1 << 4) #define GIC_PRIO_PSR_I_SET (1 << 4)
/* Additional SPSR bits not exposed in the UABI */ /* Additional SPSR bits not exposed in the UABI */
#define PSR_MODE_THREAD_BIT (1 << 0)
#define PSR_IL_BIT (1 << 20) #define PSR_IL_BIT (1 << 20)
/* AArch32-specific ptrace requests */ /* AArch32-specific ptrace requests */

View File

@ -85,7 +85,7 @@ static inline bool is_kernel_in_hyp_mode(void)
static __always_inline bool has_vhe(void) static __always_inline bool has_vhe(void)
{ {
if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN))
return true; return true;
return false; return false;

View File

@ -96,7 +96,7 @@ int main(void)
DEFINE(CPU_BOOT_PTRAUTH_KEY, offsetof(struct secondary_data, ptrauth_key)); DEFINE(CPU_BOOT_PTRAUTH_KEY, offsetof(struct secondary_data, ptrauth_key));
#endif #endif
BLANK(); BLANK();
#ifdef CONFIG_KVM_ARM_HOST #ifdef CONFIG_KVM
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));

View File

@ -234,7 +234,7 @@ static int detect_harden_bp_fw(void)
smccc_end = NULL; smccc_end = NULL;
break; break;
#if IS_ENABLED(CONFIG_KVM_ARM_HOST) #if IS_ENABLED(CONFIG_KVM)
case SMCCC_CONDUIT_SMC: case SMCCC_CONDUIT_SMC:
cb = call_smc_arch_workaround_1; cb = call_smc_arch_workaround_1;
smccc_start = __smccc_workaround_1_smc; smccc_start = __smccc_workaround_1_smc;

View File

@ -430,7 +430,7 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes"); "CPU: CPUs started in inconsistent modes");
else else
pr_info("CPU: All CPU(s) started at EL1\n"); pr_info("CPU: All CPU(s) started at EL1\n");
if (IS_ENABLED(CONFIG_KVM_ARM_HOST)) if (IS_ENABLED(CONFIG_KVM))
kvm_compute_layout(); kvm_compute_layout();
} }

View File

@ -3,7 +3,6 @@
# KVM configuration # KVM configuration
# #
source "virt/kvm/Kconfig"
source "virt/lib/Kconfig" source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION menuconfig VIRTUALIZATION
@ -18,7 +17,7 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION if VIRTUALIZATION
config KVM menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support" bool "Kernel-based Virtual Machine (KVM) support"
depends on OF depends on OF
# for TASKSTATS/TASK_DELAY_ACCT: # for TASKSTATS/TASK_DELAY_ACCT:
@ -28,13 +27,11 @@ config KVM
select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO select KVM_MMIO
select KVM_ARM_HOST
select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select SRCU select SRCU
select KVM_VFIO select KVM_VFIO
select HAVE_KVM_EVENTFD select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD select HAVE_KVM_IRQFD
select KVM_ARM_PMU if HW_PERF_EVENTS
select HAVE_KVM_MSI select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_IRQ_ROUTING
@ -45,23 +42,24 @@ config KVM
select TASK_DELAY_ACCT select TASK_DELAY_ACCT
---help--- ---help---
Support hosting virtualized guest machines. Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
levels of fake page tables.
If unsure, say N. If unsure, say N.
config KVM_ARM_HOST if KVM
bool
---help--- source "virt/kvm/Kconfig"
Provides host support for ARM processors.
config KVM_ARM_PMU config KVM_ARM_PMU
bool bool "Virtual Performance Monitoring Unit (PMU) support"
depends on HW_PERF_EVENTS
default y
---help--- ---help---
Adds support for a virtual Performance Monitoring Unit (PMU) in Adds support for a virtual Performance Monitoring Unit (PMU) in
virtual machines. virtual machines.
config KVM_INDIRECT_VECTORS config KVM_INDIRECT_VECTORS
def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS
endif # KVM
endif # VIRTUALIZATION endif # VIRTUALIZATION

View File

@ -3,37 +3,25 @@
# Makefile for Kernel-based Virtual Machine module # Makefile for Kernel-based Virtual Machine module
# #
ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic ccflags-y += -I $(srctree)/$(src)
KVM=../../../virt/kvm KVM=../../../virt/kvm
obj-$(CONFIG_KVM_ARM_HOST) += kvm.o obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp/ obj-$(CONFIG_KVM) += hyp/
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o $(KVM)/vfio.o $(KVM)/irqchip.o \
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
aarch32.o arch_timer.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \
vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o

View File

@ -451,17 +451,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
static void set_cntvoff(u64 cntvoff) static void set_cntvoff(u64 cntvoff)
{ {
u32 low = lower_32_bits(cntvoff); kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
u32 high = upper_32_bits(cntvoff);
/*
* Since kvm_call_hyp doesn't fully support the ARM PCS especially on
* 32-bit systems, but rather passes register by register shifted one
* place (we put the function address in r0/x0), we cannot simply pass
* a 64-bit value as an argument, but have to split the value in two
* 32-bit halves.
*/
kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
} }
static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)

View File

@ -22,7 +22,7 @@
#include <trace/events/kvm.h> #include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace_arm.h"
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
@ -95,6 +95,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r; return r;
} }
static int kvm_arm_default_max_vcpus(void)
{
return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
}
/** /**
* kvm_arch_init_vm - initializes a VM data structure * kvm_arch_init_vm - initializes a VM data structure
* @kvm: pointer to the KVM struct * @kvm: pointer to the KVM struct
@ -128,8 +133,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vmid.vmid_gen = 0; kvm->arch.vmid.vmid_gen = 0;
/* The maximum number of VCPUs is limited by the host's GIC model */ /* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = vgic_present ? kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
return ret; return ret;
out_free_stage2_pgd: out_free_stage2_pgd:
@ -204,10 +208,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = num_online_cpus(); r = num_online_cpus();
break; break;
case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
case KVM_CAP_MAX_VCPU_ID: case KVM_CAP_MAX_VCPU_ID:
r = KVM_MAX_VCPU_ID; if (kvm)
r = kvm->arch.max_vcpus;
else
r = kvm_arm_default_max_vcpus();
break; break;
case KVM_CAP_MSI_DEVID: case KVM_CAP_MSI_DEVID:
if (!kvm) if (!kvm)
@ -455,9 +460,9 @@ void force_vm_exit(const cpumask_t *mask)
* *
* The hardware supports a limited set of values with the value zero reserved * The hardware supports a limited set of values with the value zero reserved
* for the host, so we check if an assigned value belongs to a previous * for the host, so we check if an assigned value belongs to a previous
* generation, which which requires us to assign a new value. If we're the * generation, which requires us to assign a new value. If we're the first to
* first to use a VMID for the new generation, we must flush necessary caches * use a VMID for the new generation, we must flush necessary caches and TLBs
* and TLBs on all CPUs. * on all CPUs.
*/ */
static bool need_new_vmid_gen(struct kvm_vmid *vmid) static bool need_new_vmid_gen(struct kvm_vmid *vmid)
{ {
@ -984,8 +989,11 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
/* /*
* Ensure a rebooted VM will fault in RAM pages and detect if the * Ensure a rebooted VM will fault in RAM pages and detect if the
* guest MMU is turned off and flush the caches as needed. * guest MMU is turned off and flush the caches as needed.
*
* S2FWB enforces all memory accesses to RAM being cacheable, we
* ensure that the cache is always coherent.
*/ */
if (vcpu->arch.has_run_once) if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
stage2_unmap_vm(vcpu->kvm); stage2_unmap_vm(vcpu->kvm);
vcpu_reset_hcr(vcpu); vcpu_reset_hcr(vcpu);
@ -1266,19 +1274,41 @@ static void cpu_init_hyp_mode(void)
{ {
phys_addr_t pgd_ptr; phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr; unsigned long hyp_stack_ptr;
unsigned long stack_page;
unsigned long vector_ptr; unsigned long vector_ptr;
unsigned long tpidr_el2;
/* Switch from the HYP stub to our own HYP init vector */ /* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors(kvm_get_idmap_vector()); __hyp_set_vectors(kvm_get_idmap_vector());
/*
* Calculate the raw per-cpu offset without a translation from the
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) -
(unsigned long)kvm_ksym_ref(kvm_host_data));
pgd_ptr = kvm_mmu_get_httbr(); pgd_ptr = kvm_mmu_get_httbr();
stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
hyp_stack_ptr = stack_page + PAGE_SIZE;
vector_ptr = (unsigned long)kvm_get_hyp_vector(); vector_ptr = (unsigned long)kvm_get_hyp_vector();
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); /*
__cpu_init_stage2(); * Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
* wrong, and hyp will crash and burn when it uses any
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!system_capabilities_finalized());
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
/*
* Disabling SSBD on a non-VHE system requires us to enable SSBS
* at EL2.
*/
if (this_cpu_has_cap(ARM64_SSBS) &&
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
kvm_call_hyp(__kvm_enable_ssbs);
}
} }
static void cpu_hyp_reset(void) static void cpu_hyp_reset(void)

View File

@ -266,7 +266,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
/* /*
* Vector lengths supported by the host can't currently be * Vector lengths supported by the host can't currently be
* hidden from the guest individually: instead we can only set a * hidden from the guest individually: instead we can only set a
* maxmium via ZCR_EL2.LEN. So, make sure the available vector * maximum via ZCR_EL2.LEN. So, make sure the available vector
* lengths match the set requested exactly up to the requested * lengths match the set requested exactly up to the requested
* maximum: * maximum:
*/ */
@ -336,7 +336,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region,
unsigned int reg_num; unsigned int reg_num;
unsigned int reqoffset, reqlen; /* User-requested offset and length */ unsigned int reqoffset, reqlen; /* User-requested offset and length */
unsigned int maxlen; /* Maxmimum permitted length */ unsigned int maxlen; /* Maximum permitted length */
size_t sve_state_size; size_t sve_state_size;

View File

@ -23,7 +23,7 @@
#include <kvm/arm_hypercalls.h> #include <kvm/arm_hypercalls.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace_handle_exit.h"
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);

View File

@ -6,20 +6,10 @@
ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN) $(DISABLE_STACKLEAK_PLUGIN)
KVM=../../../../virt/kvm obj-$(CONFIG_KVM) += hyp.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o
obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
# KVM code is run at a different exception code with a different map, so # KVM code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may # compiler instrumentation that inserts callbacks or checks into the code may

View File

@ -270,8 +270,8 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{ {
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu); __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
__vgic_v3_deactivate_traps(vcpu); __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
} }
} }
@ -279,8 +279,8 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{ {
if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_activate_traps(vcpu); __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
__vgic_v3_restore_state(vcpu); __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
} }
} }

View File

@ -10,9 +10,8 @@
#include <asm/kvm_hyp.h> #include <asm/kvm_hyp.h>
void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff)
{ {
u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low;
write_sysreg(cntvoff, cntvoff_el2); write_sysreg(cntvoff, cntvoff_el2);
} }

View File

@ -194,10 +194,9 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
return val; return val;
} }
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
{ {
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = cpu_if->used_lrs;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
/* /*
* Make sure stores to the GIC via the memory mapped interface * Make sure stores to the GIC via the memory mapped interface
@ -230,10 +229,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
} }
} }
void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
{ {
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = cpu_if->used_lrs;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
int i; int i;
if (used_lrs || cpu_if->its_vpe.its_vm) { if (used_lrs || cpu_if->its_vpe.its_vm) {
@ -257,10 +255,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
} }
} }
void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
{ {
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
/* /*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
* Group0 interrupt (as generated in GICv2 mode) to be * Group0 interrupt (as generated in GICv2 mode) to be
@ -306,9 +302,8 @@ void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
} }
void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
{ {
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val; u64 val;
if (!cpu_if->vgic_sre) { if (!cpu_if->vgic_sre) {
@ -333,15 +328,11 @@ void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
write_gicreg(0, ICH_HCR_EL2); write_gicreg(0, ICH_HCR_EL2);
} }
void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{ {
struct vgic_v3_cpu_if *cpu_if;
u64 val; u64 val;
u32 nr_pre_bits; u32 nr_pre_bits;
vcpu = kern_hyp_va(vcpu);
cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
val = read_gicreg(ICH_VTR_EL2); val = read_gicreg(ICH_VTR_EL2);
nr_pre_bits = vtr_to_nr_pre_bits(val); nr_pre_bits = vtr_to_nr_pre_bits(val);
@ -370,15 +361,11 @@ void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
} }
} }
void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{ {
struct vgic_v3_cpu_if *cpu_if;
u64 val; u64 val;
u32 nr_pre_bits; u32 nr_pre_bits;
vcpu = kern_hyp_va(vcpu);
cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
val = read_gicreg(ICH_VTR_EL2); val = read_gicreg(ICH_VTR_EL2);
nr_pre_bits = vtr_to_nr_pre_bits(val); nr_pre_bits = vtr_to_nr_pre_bits(val);
@ -431,8 +418,6 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
write_gicreg(vmcr, ICH_VMCR_EL2); write_gicreg(vmcr, ICH_VMCR_EL2);
} }
#ifdef CONFIG_ARM64
static int __hyp_text __vgic_v3_bpr_min(void) static int __hyp_text __vgic_v3_bpr_min(void)
{ {
/* See Pseudocode for VPriorityGroup */ /* See Pseudocode for VPriorityGroup */
@ -453,7 +438,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
u32 vmcr, u32 vmcr,
u64 *lr_val) u64 *lr_val)
{ {
unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
u8 priority = GICv3_IDLE_PRIORITY; u8 priority = GICv3_IDLE_PRIORITY;
int i, lr = -1; int i, lr = -1;
@ -492,7 +477,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
int intid, u64 *lr_val) int intid, u64 *lr_val)
{ {
unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
int i; int i;
for (i = 0; i < used_lrs; i++) { for (i = 0; i < used_lrs; i++) {
@ -579,7 +564,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
/* /*
* The priority value is independent of any of the BPR values, so we * The priority value is independent of any of the BPR values, so we
* normalize it using the minumal BPR value. This guarantees that no * normalize it using the minimal BPR value. This guarantees that no
* matter what the guest does with its BPR, we can always set/get the * matter what the guest does with its BPR, we can always set/get the
* same value of a priority. * same value of a priority.
*/ */
@ -1126,5 +1111,3 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
#endif

View File

@ -26,28 +26,12 @@ enum exception_type {
except_type_serror = 0x180, except_type_serror = 0x180,
}; };
static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
{
u64 exc_offset;
switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
case PSR_MODE_EL1t:
exc_offset = CURRENT_EL_SP_EL0_VECTOR;
break;
case PSR_MODE_EL1h:
exc_offset = CURRENT_EL_SP_ELx_VECTOR;
break;
case PSR_MODE_EL0t:
exc_offset = LOWER_EL_AArch64_VECTOR;
break;
default:
exc_offset = LOWER_EL_AArch32_VECTOR;
}
return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
/* /*
* This performs the exception entry at a given EL (@target_mode), stashing PC
* and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
* The EL passed to this function *must* be a non-secure, privileged mode with
* bit 0 being set (PSTATE.SP == 1).
*
* When an exception is taken, most PSTATE fields are left unchanged in the * When an exception is taken, most PSTATE fields are left unchanged in the
* handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
* of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
@ -59,10 +43,35 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
* Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
* MSB to LSB. * MSB to LSB.
*/ */
static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
enum exception_type type)
{ {
unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); unsigned long sctlr, vbar, old, new, mode;
unsigned long old, new; u64 exc_offset;
mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
if (mode == target_mode)
exc_offset = CURRENT_EL_SP_ELx_VECTOR;
else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
exc_offset = CURRENT_EL_SP_EL0_VECTOR;
else if (!(mode & PSR_MODE32_BIT))
exc_offset = LOWER_EL_AArch64_VECTOR;
else
exc_offset = LOWER_EL_AArch32_VECTOR;
switch (target_mode) {
case PSR_MODE_EL1h:
vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
break;
default:
/* Don't do that */
BUG();
}
*vcpu_pc(vcpu) = vbar + exc_offset + type;
old = *vcpu_cpsr(vcpu); old = *vcpu_cpsr(vcpu);
new = 0; new = 0;
@ -105,9 +114,10 @@ static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu)
new |= PSR_I_BIT; new |= PSR_I_BIT;
new |= PSR_F_BIT; new |= PSR_F_BIT;
new |= PSR_MODE_EL1h; new |= target_mode;
return new; *vcpu_cpsr(vcpu) = new;
vcpu_write_spsr(vcpu, old);
} }
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@ -116,11 +126,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0; u32 esr = 0;
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
vcpu_write_sys_reg(vcpu, addr, FAR_EL1); vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
@ -148,14 +154,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
static void inject_undef64(struct kvm_vcpu *vcpu) static void inject_undef64(struct kvm_vcpu *vcpu)
{ {
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = get_except64_pstate(vcpu);
vcpu_write_spsr(vcpu, cpsr);
/* /*
* Build an unknown exception, depending on the instruction * Build an unknown exception, depending on the instruction

View File

@ -131,7 +131,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
/* /*
* No valid syndrome? Ask userspace for help if it has * No valid syndrome? Ask userspace for help if it has
* voluntered to do so, and bail out otherwise. * volunteered to do so, and bail out otherwise.
*/ */
if (!kvm_vcpu_dabt_isvalid(vcpu)) { if (!kvm_vcpu_dabt_isvalid(vcpu)) {
if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {

View File

@ -422,6 +422,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
next = stage2_pgd_addr_end(kvm, addr, end); next = stage2_pgd_addr_end(kvm, addr, end);
if (!stage2_pgd_none(kvm, *pgd)) if (!stage2_pgd_none(kvm, *pgd))
stage2_flush_puds(kvm, pgd, addr, next); stage2_flush_puds(kvm, pgd, addr, next);
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
} }
@ -784,7 +787,7 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
mutex_lock(&kvm_hyp_pgd_mutex); mutex_lock(&kvm_hyp_pgd_mutex);
/* /*
* This assumes that we we have enough space below the idmap * This assumes that we have enough space below the idmap
* page to allocate our VAs. If not, the check below will * page to allocate our VAs. If not, the check below will
* kick. A potential alternative would be to detect that * kick. A potential alternative would be to detect that
* overflow and switch to an allocation above the idmap. * overflow and switch to an allocation above the idmap.
@ -964,7 +967,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,
* stage2_unmap_vm - Unmap Stage-2 RAM mappings * stage2_unmap_vm - Unmap Stage-2 RAM mappings
* @kvm: The struct kvm pointer * @kvm: The struct kvm pointer
* *
* Go through the memregions and unmap any reguler RAM * Go through the memregions and unmap any regular RAM
* backing memory already mapped to the VM. * backing memory already mapped to the VM.
*/ */
void stage2_unmap_vm(struct kvm *kvm) void stage2_unmap_vm(struct kvm *kvm)
@ -1372,47 +1375,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
return ret; return ret;
} }
static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *ipap >> PAGE_SHIFT;
if (kvm_is_transparent_hugepage(pfn)) {
unsigned long mask;
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
* not the individual tail page, we need to transfer the
* refcount to the head page. We have to be careful that the
* THP doesn't start to split while we are adjusting the
* refcounts.
*
* We are sure this doesn't happen, because mmu_notifier_retry
* was successful and we are holding the mmu_lock, so if this
* THP is trying to split, it will be blocked in the mmu
* notifier before touching any of the pages, specifically
* before being able to call __split_huge_page_refcount().
*
* We can therefore safely transfer the refcount from PG_tail
* to PG_head and switch the pfn from a tail page to the head
* page accordingly.
*/
mask = PTRS_PER_PMD - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
*ipap &= PMD_MASK;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
*pfnp = pfn;
}
return true;
}
return false;
}
/** /**
* stage2_wp_ptes - write protect PMD range * stage2_wp_ptes - write protect PMD range
* @pmd: pointer to pmd entry * @pmd: pointer to pmd entry
@ -1607,6 +1569,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
hva_t uaddr_start, uaddr_end; hva_t uaddr_start, uaddr_end;
size_t size; size_t size;
/* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */
if (map_size == PAGE_SIZE)
return true;
size = memslot->npages * PAGE_SIZE; size = memslot->npages * PAGE_SIZE;
gpa_start = memslot->base_gfn << PAGE_SHIFT; gpa_start = memslot->base_gfn << PAGE_SHIFT;
@ -1626,7 +1592,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
* |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
* +-----+--------------------+--------------------+---+ * +-----+--------------------+--------------------+---+
* *
* memslot->base_gfn << PAGE_SIZE: * memslot->base_gfn << PAGE_SHIFT:
* +---+--------------------+--------------------+-----+ * +---+--------------------+--------------------+-----+
* |abc|def Stage-2 block | Stage-2 block |tvxyz| * |abc|def Stage-2 block | Stage-2 block |tvxyz|
* +---+--------------------+--------------------+-----+ * +---+--------------------+--------------------+-----+
@ -1656,6 +1622,59 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
(hva & ~(map_size - 1)) + map_size <= uaddr_end; (hva & ~(map_size - 1)) + map_size <= uaddr_end;
} }
/*
* Check if the given hva is backed by a transparent huge page (THP) and
* whether it can be mapped using block mapping in stage2. If so, adjust
* the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently
* supported. This will need to be updated to support other THP sizes.
*
* Returns the size of the mapping.
*/
static unsigned long
transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
unsigned long hva, kvm_pfn_t *pfnp,
phys_addr_t *ipap)
{
kvm_pfn_t pfn = *pfnp;
/*
* Make sure the adjustment is done only for THP pages. Also make
* sure that the HVA and IPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
if (kvm_is_transparent_hugepage(pfn) &&
fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
* not the individual tail page, we need to transfer the
* refcount to the head page. We have to be careful that the
* THP doesn't start to split while we are adjusting the
* refcounts.
*
* We are sure this doesn't happen, because mmu_notifier_retry
* was successful and we are holding the mmu_lock, so if this
* THP is trying to split, it will be blocked in the mmu
* notifier before touching any of the pages, specifically
* before being able to call __split_huge_page_refcount().
*
* We can therefore safely transfer the refcount from PG_tail
* to PG_head and switch the pfn from a tail page to the head
* page accordingly.
*/
*ipap &= PMD_MASK;
kvm_release_pfn_clean(pfn);
pfn &= ~(PTRS_PER_PMD - 1);
kvm_get_pfn(pfn);
*pfnp = pfn;
return PMD_SIZE;
}
/* Use page mapping if we cannot use block mapping. */
return PAGE_SIZE;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva, struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status) unsigned long fault_status)
@ -1769,20 +1788,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (mmu_notifier_retry(kvm, mmu_seq)) if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock; goto out_unlock;
if (vma_pagesize == PAGE_SIZE && !force_pte) { /*
/* * If we are not forced to use page mapping, check if we are
* Only PMD_SIZE transparent hugepages(THP) are * backed by a THP and thus use block mapping if possible.
* currently supported. This code will need to be */
* updated to support other THP sizes. if (vma_pagesize == PAGE_SIZE && !force_pte)
* vma_pagesize = transparent_hugepage_adjust(memslot, hva,
* Make sure the host VA and the guest IPA are sufficiently &pfn, &fault_ipa);
* aligned and that the block is contained within the memslot.
*/
if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
transparent_hugepage_adjust(&pfn, &fault_ipa))
vma_pagesize = PMD_SIZE;
}
if (writable) if (writable)
kvm_set_pfn_dirty(pfn); kvm_set_pfn_dirty(pfn);
@ -2185,11 +2197,11 @@ int kvm_mmu_init(void)
{ {
int err; int err;
hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
/* /*
* We rely on the linker script to ensure at build time that the HYP * We rely on the linker script to ensure at build time that the HYP
@ -2262,11 +2274,19 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{ {
/* /*
* At this point memslot has been committed and there is an * At this point memslot has been committed and there is an
* allocated dirty_bitmap[], dirty pages will be be tracked while the * allocated dirty_bitmap[], dirty pages will be tracked while the
* memory slot is write protected. * memory slot is write protected.
*/ */
if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
kvm_mmu_wp_memory_region(kvm, mem->slot); /*
* If we're with initial-all-set, we don't need to write
* protect any pages because they're all reported as dirty.
* Huge pages and normal pages will be write protect gradually.
*/
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
kvm_mmu_wp_memory_region(kvm, mem->slot);
}
}
} }
int kvm_arch_prepare_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm,

View File

@ -94,7 +94,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
/* /*
* NOTE: We always update r0 (or x0) because for PSCI v0.1 * NOTE: We always update r0 (or x0) because for PSCI v0.1
* the general puspose registers are undefined upon CPU_ON. * the general purpose registers are undefined upon CPU_ON.
*/ */
reset_state->r0 = smccc_get_arg3(source_vcpu); reset_state->r0 = smccc_get_arg3(source_vcpu);
@ -265,10 +265,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
case PSCI_0_2_FN_SYSTEM_OFF: case PSCI_0_2_FN_SYSTEM_OFF:
kvm_psci_system_off(vcpu); kvm_psci_system_off(vcpu);
/* /*
* We should'nt be going back to guest VCPU after * We shouldn't be going back to guest VCPU after
* receiving SYSTEM_OFF request. * receiving SYSTEM_OFF request.
* *
* If user space accidently/deliberately resumes * If user space accidentally/deliberately resumes
* guest VCPU after SYSTEM_OFF request then guest * guest VCPU after SYSTEM_OFF request then guest
* VCPU should see internal failure from PSCI return * VCPU should see internal failure from PSCI return
* value. To achieve this, we preload r0 (or x0) with * value. To achieve this, we preload r0 (or x0) with

View File

@ -36,15 +36,11 @@ static u32 kvm_ipa_limit;
/* /*
* ARMv8 Reset Values * ARMv8 Reset Values
*/ */
static const struct kvm_regs default_regs_reset = { #define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_D_BIT)
PSR_F_BIT | PSR_D_BIT),
};
static const struct kvm_regs default_regs_reset32 = { #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
.regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | PSR_AA32_I_BIT | PSR_AA32_F_BIT)
PSR_AA32_I_BIT | PSR_AA32_F_BIT),
};
static bool cpu_has_32bit_el1(void) static bool cpu_has_32bit_el1(void)
{ {
@ -163,7 +159,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
vl = vcpu->arch.sve_max_vl; vl = vcpu->arch.sve_max_vl;
/* /*
* Resposibility for these properties is shared between * Responsibility for these properties is shared between
* kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
* set_sve_vls(). Double-check here just to be sure: * set_sve_vls(). Double-check here just to be sure:
*/ */
@ -249,7 +245,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
* ioctl or as part of handling a request issued by another VCPU in the PSCI * ioctl or as part of handling a request issued by another VCPU in the PSCI
* handling code. In the first case, the VCPU will not be loaded, and in the * handling code. In the first case, the VCPU will not be loaded, and in the
* second case the VCPU will be loaded. Because this function operates purely * second case the VCPU will be loaded. Because this function operates purely
* on the memory-backed valus of system registers, we want to do a full put if * on the memory-backed values of system registers, we want to do a full put if
* we were loaded (handling a request) and load the values back at the end of * we were loaded (handling a request) and load the values back at the end of
* the function. Otherwise we leave the state alone. In both cases, we * the function. Otherwise we leave the state alone. In both cases, we
* disable preemption around the vcpu reset as we would otherwise race with * disable preemption around the vcpu reset as we would otherwise race with
@ -257,9 +253,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
*/ */
int kvm_reset_vcpu(struct kvm_vcpu *vcpu) int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{ {
const struct kvm_regs *cpu_reset;
int ret = -EINVAL; int ret = -EINVAL;
bool loaded; bool loaded;
u32 pstate;
/* Reset PMU outside of the non-preemptible section */ /* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset(vcpu); kvm_pmu_vcpu_reset(vcpu);
@ -290,16 +286,17 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
if (!cpu_has_32bit_el1()) if (!cpu_has_32bit_el1())
goto out; goto out;
cpu_reset = &default_regs_reset32; pstate = VCPU_RESET_PSTATE_SVC;
} else { } else {
cpu_reset = &default_regs_reset; pstate = VCPU_RESET_PSTATE_EL1;
} }
break; break;
} }
/* Reset core registers */ /* Reset core registers */
memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
vcpu_gp_regs(vcpu)->regs.pstate = pstate;
/* Reset system registers */ /* Reset system registers */
kvm_reset_sys_regs(vcpu); kvm_reset_sys_regs(vcpu);
@ -357,7 +354,7 @@ void kvm_set_ipa_limit(void)
* *
* So clamp the ipa limit further down to limit the number of levels. * So clamp the ipa limit further down to limit the number of levels.
* Since we can concatenate upto 16 tables at entry level, we could * Since we can concatenate upto 16 tables at entry level, we could
* go upto 4bits above the maximum VA addressible with the current * go upto 4bits above the maximum VA addressable with the current
* number of levels. * number of levels.
*/ */
va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;

View File

@ -34,7 +34,7 @@
#include "trace.h" #include "trace.h"
/* /*
* All of this file is extremly similar to the ARM coproc.c, but the * All of this file is extremely similar to the ARM coproc.c, but the
* types are different. My gut feeling is that it should be pretty * types are different. My gut feeling is that it should be pretty
* easy to merge, but that would be an ABI breakage -- again. VFP * easy to merge, but that would be an ABI breakage -- again. VFP
* would also need to be abstracted. * would also need to be abstracted.
@ -64,11 +64,8 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false; return false;
} }
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
{ {
if (!vcpu->arch.sysregs_loaded_on_cpu)
goto immediate_read;
/* /*
* System registers listed in the switch are not saved on every * System registers listed in the switch are not saved on every
* exit from the guest but are only saved on vcpu_put. * exit from the guest but are only saved on vcpu_put.
@ -79,75 +76,92 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
* thread when emulating cross-VCPU communication. * thread when emulating cross-VCPU communication.
*/ */
switch (reg) { switch (reg) {
case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break;
case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break;
case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break;
case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break;
case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break;
case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break;
case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break;
case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break;
case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
default: return false;
} }
immediate_read: return true;
return __vcpu_sys_reg(vcpu, reg);
} }
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
{ {
if (!vcpu->arch.sysregs_loaded_on_cpu)
goto immediate_write;
/* /*
* System registers listed in the switch are not restored on every * System registers listed in the switch are not restored on every
* entry to the guest but are only restored on vcpu_load. * entry to the guest but are only restored on vcpu_load.
* *
* Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
* should never be listed below, because the the MPIDR should only be * should never be listed below, because the MPIDR should only be set
* set once, before running the VCPU, and never changed later. * once, before running the VCPU, and never changed later.
*/ */
switch (reg) { switch (reg) {
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break;
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break;
case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break;
case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break;
case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break;
case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break;
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
default: return false;
} }
immediate_write: return true;
}
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
{
u64 val = 0x8badf00d8badf00d;
if (vcpu->arch.sysregs_loaded_on_cpu &&
__vcpu_read_sys_reg_from_cpu(reg, &val))
return val;
return __vcpu_sys_reg(vcpu, reg);
}
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
{
if (vcpu->arch.sysregs_loaded_on_cpu &&
__vcpu_write_sys_reg_to_cpu(val, reg))
return;
__vcpu_sys_reg(vcpu, reg) = val; __vcpu_sys_reg(vcpu, reg) = val;
} }
@ -1532,7 +1546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
{ SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
{ SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 }, { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
@ -1571,8 +1585,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 },
{ SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 },
{ SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 },
{ SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 },
{ SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 },
{ SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid },
@ -2073,12 +2087,37 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer },
}; };
static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
bool is_32)
{
unsigned int i;
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
kvm_err("sys_reg table %p entry %d has lacks reset\n",
table, i);
return 1;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
return 1;
}
}
return 0;
}
/* Target specific emulation tables */ /* Target specific emulation tables */
static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_sys_reg_table(unsigned int target, void kvm_register_target_sys_reg_table(unsigned int target,
struct kvm_sys_reg_target_table *table) struct kvm_sys_reg_target_table *table)
{ {
if (check_sysreg_table(table->table64.table, table->table64.num, false) ||
check_sysreg_table(table->table32.table, table->table32.num, true))
return;
target_tables[target] = table; target_tables[target] = table;
} }
@ -2364,19 +2403,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
} }
static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *table, size_t num, const struct sys_reg_desc *table, size_t num)
unsigned long *bmap)
{ {
unsigned long i; unsigned long i;
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
if (table[i].reset) { if (table[i].reset)
int reg = table[i].reg;
table[i].reset(vcpu, &table[i]); table[i].reset(vcpu, &table[i]);
if (reg > 0 && reg < NR_SYS_REGS)
set_bit(reg, bmap);
}
} }
/** /**
@ -2832,32 +2865,18 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return write_demux_regids(uindices); return write_demux_regids(uindices);
} }
static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
{
unsigned int i;
for (i = 1; i < n; i++) {
if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
return 1;
}
}
return 0;
}
void kvm_sys_reg_table_init(void) void kvm_sys_reg_table_init(void)
{ {
unsigned int i; unsigned int i;
struct sys_reg_desc clidr; struct sys_reg_desc clidr;
/* Make sure tables are unique and in order. */ /* Make sure tables are unique and in order. */
BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false));
BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true));
BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true));
BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true));
BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true));
BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false));
/* We abuse the reset function to overwrite the table itself. */ /* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
@ -2893,17 +2912,10 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{ {
size_t num; size_t num;
const struct sys_reg_desc *table; const struct sys_reg_desc *table;
DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, };
/* Generic chip reset first (so target could override). */ /* Generic chip reset first (so target could override). */
reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
table = get_target_table(vcpu->arch.target, true, &num); table = get_target_table(vcpu->arch.target, true, &num);
reset_sys_reg_descs(vcpu, table, num, bmap); reset_sys_reg_descs(vcpu, table, num);
for (num = 1; num < NR_SYS_REGS; num++) {
if (WARN(!test_bit(num, bmap),
"Didn't reset __vcpu_sys_reg(%zi)\n", num))
break;
}
} }

View File

@ -1,216 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #ifndef _TRACE_ARM64_KVM_H
#define _TRACE_ARM64_KVM_H #define _TRACE_ARM64_KVM_H
#include <linux/tracepoint.h> #include "trace_arm.h"
#include "sys_regs.h" #include "trace_handle_exit.h"
#undef TRACE_SYSTEM #endif /* _TRACE_ARM64_KVM_H */
#define TRACE_SYSTEM kvm
TRACE_EVENT(kvm_wfx_arm64,
TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
TP_ARGS(vcpu_pc, is_wfe),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_wfe)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_wfe = is_wfe;
),
TP_printk("guest executed wf%c at: 0x%08lx",
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
);
TRACE_EVENT(kvm_hvc_arm64,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(unsigned long, r0)
__field(unsigned long, imm)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->r0 = r0;
__entry->imm = imm;
),
TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
TRACE_EVENT(kvm_arm_setup_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_clear_debug,
TP_PROTO(__u32 guest_debug),
TP_ARGS(guest_debug),
TP_STRUCT__entry(
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->guest_debug = guest_debug;
),
TP_printk("flags: 0x%08x", __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_set_dreg32,
TP_PROTO(const char *name, __u32 value),
TP_ARGS(name, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(__u32, value)
),
TP_fast_assign(
__entry->name = name;
__entry->value = value;
),
TP_printk("%s: 0x%08x", __entry->name, __entry->value)
);
TRACE_DEFINE_SIZEOF(__u64);
TRACE_EVENT(kvm_arm_set_regset,
TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
TP_ARGS(type, len, control, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, len)
__array(u64, ctrls, 16)
__array(u64, values, 16)
),
TP_fast_assign(
__entry->name = type;
__entry->len = len;
memcpy(__entry->ctrls, control, len << 3);
memcpy(__entry->values, value, len << 3);
),
TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
__print_array(__entry->values, __entry->len, sizeof(__u64)))
);
TRACE_EVENT(trap_reg,
TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
TP_ARGS(fn, reg, is_write, write_value),
TP_STRUCT__entry(
__field(const char *, fn)
__field(int, reg)
__field(bool, is_write)
__field(u64, write_value)
),
TP_fast_assign(
__entry->fn = fn;
__entry->reg = reg;
__entry->is_write = is_write;
__entry->write_value = write_value;
),
TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
);
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
TP_STRUCT__entry(
__field(unsigned long, hsr)
),
TP_fast_assign(
__entry->hsr = hsr;
),
TP_printk("HSR 0x%08lx", __entry->hsr)
);
TRACE_EVENT(kvm_sys_access,
TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
TP_ARGS(vcpu_pc, params, reg),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_write)
__field(const char *, name)
__field(u8, Op0)
__field(u8, Op1)
__field(u8, CRn)
__field(u8, CRm)
__field(u8, Op2)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_write = params->is_write;
__entry->name = reg->name;
__entry->Op0 = reg->Op0;
__entry->Op0 = reg->Op0;
__entry->Op1 = reg->Op1;
__entry->CRn = reg->CRn;
__entry->CRm = reg->CRm;
__entry->Op2 = reg->Op2;
),
TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
__entry->is_write ? "write" : "read")
);
TRACE_EVENT(kvm_set_guest_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
#endif /* _TRACE_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -1,10 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H #define _TRACE_ARM_ARM64_KVM_H
#include <kvm/arm_arch_timer.h> #include <kvm/arm_arch_timer.h>
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <asm/kvm_arm.h>
#undef TRACE_SYSTEM #undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm #define TRACE_SYSTEM kvm
@ -368,12 +367,12 @@ TRACE_EVENT(kvm_timer_emulate,
__entry->timer_idx, __entry->should_fire) __entry->timer_idx, __entry->should_fire)
); );
#endif /* _TRACE_KVM_H */ #endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../virt/kvm/arm #define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE #undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace #define TRACE_INCLUDE_FILE trace_arm
/* This part must be outside protection */ /* This part must be outside protection */
#include <trace/define_trace.h> #include <trace/define_trace.h>

View File

@ -0,0 +1,215 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_HANDLE_EXIT_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HANDLE_EXIT_ARM64_KVM_H
#include <linux/tracepoint.h>
#include "sys_regs.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
TRACE_EVENT(kvm_wfx_arm64,
TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
TP_ARGS(vcpu_pc, is_wfe),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_wfe)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_wfe = is_wfe;
),
TP_printk("guest executed wf%c at: 0x%08lx",
__entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
);
TRACE_EVENT(kvm_hvc_arm64,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(unsigned long, r0)
__field(unsigned long, imm)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->r0 = r0;
__entry->imm = imm;
),
TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
__entry->vcpu_pc, __entry->r0, __entry->imm)
);
TRACE_EVENT(kvm_arm_setup_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_clear_debug,
TP_PROTO(__u32 guest_debug),
TP_ARGS(guest_debug),
TP_STRUCT__entry(
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->guest_debug = guest_debug;
),
TP_printk("flags: 0x%08x", __entry->guest_debug)
);
TRACE_EVENT(kvm_arm_set_dreg32,
TP_PROTO(const char *name, __u32 value),
TP_ARGS(name, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(__u32, value)
),
TP_fast_assign(
__entry->name = name;
__entry->value = value;
),
TP_printk("%s: 0x%08x", __entry->name, __entry->value)
);
TRACE_DEFINE_SIZEOF(__u64);
TRACE_EVENT(kvm_arm_set_regset,
TP_PROTO(const char *type, int len, __u64 *control, __u64 *value),
TP_ARGS(type, len, control, value),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, len)
__array(u64, ctrls, 16)
__array(u64, values, 16)
),
TP_fast_assign(
__entry->name = type;
__entry->len = len;
memcpy(__entry->ctrls, control, len << 3);
memcpy(__entry->values, value, len << 3);
),
TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name,
__print_array(__entry->ctrls, __entry->len, sizeof(__u64)),
__print_array(__entry->values, __entry->len, sizeof(__u64)))
);
TRACE_EVENT(trap_reg,
TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value),
TP_ARGS(fn, reg, is_write, write_value),
TP_STRUCT__entry(
__field(const char *, fn)
__field(int, reg)
__field(bool, is_write)
__field(u64, write_value)
),
TP_fast_assign(
__entry->fn = fn;
__entry->reg = reg;
__entry->is_write = is_write;
__entry->write_value = write_value;
),
TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value)
);
TRACE_EVENT(kvm_handle_sys_reg,
TP_PROTO(unsigned long hsr),
TP_ARGS(hsr),
TP_STRUCT__entry(
__field(unsigned long, hsr)
),
TP_fast_assign(
__entry->hsr = hsr;
),
TP_printk("HSR 0x%08lx", __entry->hsr)
);
TRACE_EVENT(kvm_sys_access,
TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg),
TP_ARGS(vcpu_pc, params, reg),
TP_STRUCT__entry(
__field(unsigned long, vcpu_pc)
__field(bool, is_write)
__field(const char *, name)
__field(u8, Op0)
__field(u8, Op1)
__field(u8, CRn)
__field(u8, CRm)
__field(u8, Op2)
),
TP_fast_assign(
__entry->vcpu_pc = vcpu_pc;
__entry->is_write = params->is_write;
__entry->name = reg->name;
__entry->Op0 = reg->Op0;
__entry->Op0 = reg->Op0;
__entry->Op1 = reg->Op1;
__entry->CRn = reg->CRn;
__entry->CRm = reg->CRm;
__entry->Op2 = reg->Op2;
),
TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s",
__entry->vcpu_pc, __entry->name ?: "UNKN",
__entry->Op0, __entry->Op1, __entry->CRn,
__entry->CRm, __entry->Op2,
__entry->is_write ? "write" : "read")
);
TRACE_EVENT(kvm_set_guest_debug,
TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug),
TP_ARGS(vcpu, guest_debug),
TP_STRUCT__entry(
__field(struct kvm_vcpu *, vcpu)
__field(__u32, guest_debug)
),
TP_fast_assign(
__entry->vcpu = vcpu;
__entry->guest_debug = guest_debug;
),
TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug)
);
#endif /* _TRACE_HANDLE_EXIT_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace_handle_exit
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -7,7 +7,7 @@
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
#include <asm/kvm_emulate.h> #include <asm/kvm_emulate.h>
#include "vgic.h" #include "vgic/vgic.h"
#include "sys_regs.h" #include "sys_regs.h"
static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,

View File

@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending,
#endif /* _TRACE_VGIC_H */ #endif /* _TRACE_VGIC_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic #define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic
#undef TRACE_INCLUDE_FILE #undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE trace #define TRACE_INCLUDE_FILE trace

View File

@ -56,7 +56,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
cpuif->vgic_hcr &= ~GICH_HCR_UIE; cpuif->vgic_hcr &= ~GICH_HCR_UIE;
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr]; u32 val = cpuif->vgic_lr[lr];
u32 cpuid, intid = val & GICH_LR_VIRTUALID; u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq; struct vgic_irq *irq;
@ -120,7 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_put_irq(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq);
} }
vgic_cpu->used_lrs = 0; cpuif->used_lrs = 0;
} }
/* /*
@ -427,7 +427,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
{ {
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 used_lrs = cpu_if->used_lrs;
u64 elrsr; u64 elrsr;
int i; int i;
@ -448,7 +448,7 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
void vgic_v2_save_state(struct kvm_vcpu *vcpu) void vgic_v2_save_state(struct kvm_vcpu *vcpu)
{ {
void __iomem *base = kvm_vgic_global_state.vctrl_base; void __iomem *base = kvm_vgic_global_state.vctrl_base;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
if (!base) if (!base)
return; return;
@ -463,7 +463,7 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
{ {
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
void __iomem *base = kvm_vgic_global_state.vctrl_base; void __iomem *base = kvm_vgic_global_state.vctrl_base;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; u64 used_lrs = cpu_if->used_lrs;
int i; int i;
if (!base) if (!base)

View File

@ -39,7 +39,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
cpuif->vgic_hcr &= ~ICH_HCR_UIE; cpuif->vgic_hcr &= ~ICH_HCR_UIE;
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { for (lr = 0; lr < cpuif->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr]; u64 val = cpuif->vgic_lr[lr];
u32 intid, cpuid; u32 intid, cpuid;
struct vgic_irq *irq; struct vgic_irq *irq;
@ -111,7 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
vgic_put_irq(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq);
} }
vgic_cpu->used_lrs = 0; cpuif->used_lrs = 0;
} }
/* Requires the irq to be locked already */ /* Requires the irq to be locked already */
@ -587,7 +587,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
int ret; int ret;
/* /*
* The ListRegs field is 5 bits, but there is a architectural * The ListRegs field is 5 bits, but there is an architectural
* maximum of 16 list registers. Just ignore bit 4... * maximum of 16 list registers. Just ignore bit 4...
*/ */
kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
@ -630,12 +630,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (kvm_vgic_global_state.vcpu_base == 0) if (kvm_vgic_global_state.vcpu_base == 0)
kvm_info("disabling GICv2 emulation\n"); kvm_info("disabling GICv2 emulation\n");
#ifdef CONFIG_ARM64
if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true; group0_trap = true;
group1_trap = true; group1_trap = true;
} }
#endif
if (group0_trap || group1_trap || common_trap) { if (group0_trap || group1_trap || common_trap) {
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
@ -664,10 +662,10 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
if (likely(cpu_if->vgic_sre)) if (likely(cpu_if->vgic_sre))
kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if));
if (has_vhe()) if (has_vhe())
__vgic_v3_activate_traps(vcpu); __vgic_v3_activate_traps(cpu_if);
WARN_ON(vgic_v4_load(vcpu)); WARN_ON(vgic_v4_load(vcpu));
} }
@ -682,12 +680,14 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v3_put(struct kvm_vcpu *vcpu) void vgic_v3_put(struct kvm_vcpu *vcpu)
{ {
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
WARN_ON(vgic_v4_put(vcpu, false)); WARN_ON(vgic_v4_put(vcpu, false));
vgic_v3_vmcr_sync(vcpu); vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, vcpu); kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if));
if (has_vhe()) if (has_vhe())
__vgic_v3_deactivate_traps(vcpu); __vgic_v3_deactivate_traps(cpu_if);
} }

View File

@ -786,6 +786,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
int count; int count;
bool multi_sgi; bool multi_sgi;
u8 prio = 0xff; u8 prio = 0xff;
int i = 0;
lockdep_assert_held(&vgic_cpu->ap_list_lock); lockdep_assert_held(&vgic_cpu->ap_list_lock);
@ -827,11 +828,14 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
} }
} }
vcpu->arch.vgic_cpu.used_lrs = count;
/* Nuke remaining LRs */ /* Nuke remaining LRs */
for ( ; count < kvm_vgic_global_state.nr_lr; count++) for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
vgic_clear_lr(vcpu, count); vgic_clear_lr(vcpu, i);
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
else
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
} }
static inline bool can_access_vgic_from_kernel(void) static inline bool can_access_vgic_from_kernel(void)
@ -849,13 +853,13 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_save_state(vcpu); vgic_v2_save_state(vcpu);
else else
__vgic_v3_save_state(vcpu); __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
} }
/* Sync back the hardware VGIC state into our emulation after a guest's run. */ /* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{ {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int used_lrs;
/* An empty ap_list_head implies used_lrs == 0 */ /* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
@ -864,7 +868,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
if (can_access_vgic_from_kernel()) if (can_access_vgic_from_kernel())
vgic_save_state(vcpu); vgic_save_state(vcpu);
if (vgic_cpu->used_lrs) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
else
used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
if (used_lrs)
vgic_fold_lr_state(vcpu); vgic_fold_lr_state(vcpu);
vgic_prune_ap_list(vcpu); vgic_prune_ap_list(vcpu);
} }
@ -874,7 +883,7 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_restore_state(vcpu); vgic_v2_restore_state(vcpu);
else else
__vgic_v3_restore_state(vcpu); __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
} }
/* Flush our emulation state into the GIC hardware before entering the guest. */ /* Flush our emulation state into the GIC hardware before entering the guest. */

View File

@ -274,6 +274,8 @@ struct vgic_v2_cpu_if {
u32 vgic_vmcr; u32 vgic_vmcr;
u32 vgic_apr; u32 vgic_apr;
u32 vgic_lr[VGIC_V2_MAX_LRS]; u32 vgic_lr[VGIC_V2_MAX_LRS];
unsigned int used_lrs;
}; };
struct vgic_v3_cpu_if { struct vgic_v3_cpu_if {
@ -291,6 +293,8 @@ struct vgic_v3_cpu_if {
* linking the Linux IRQ subsystem and the ITS together. * linking the Linux IRQ subsystem and the ITS together.
*/ */
struct its_vpe its_vpe; struct its_vpe its_vpe;
unsigned int used_lrs;
}; };
struct vgic_cpu { struct vgic_cpu {
@ -300,7 +304,6 @@ struct vgic_cpu {
struct vgic_v3_cpu_if vgic_v3; struct vgic_v3_cpu_if vgic_v3;
}; };
unsigned int used_lrs;
struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS];
raw_spinlock_t ap_list_lock; /* Protects the ap_list */ raw_spinlock_t ap_list_lock; /* Protects the ap_list */

View File

@ -119,7 +119,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
/* /*
* We're using this spinlock to sync access to the coalesced ring. * We're using this spinlock to sync access to the coalesced ring.
* The list doesn't need it's own lock since device registration and * The list doesn't need its own lock since device registration and
* unregistration should only happen when kvm->slots_lock is held. * unregistration should only happen when kvm->slots_lock is held.
*/ */
spin_lock_init(&kvm->ring_lock); spin_lock_init(&kvm->ring_lock);

View File

@ -116,7 +116,7 @@ irqfd_shutdown(struct work_struct *work)
struct kvm *kvm = irqfd->kvm; struct kvm *kvm = irqfd->kvm;
u64 cnt; u64 cnt;
/* Make sure irqfd has been initalized in assign path. */ /* Make sure irqfd has been initialized in assign path. */
synchronize_srcu(&kvm->irq_srcu); synchronize_srcu(&kvm->irq_srcu);
/* /*

View File

@ -2825,7 +2825,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
* *
* (a) VCPU which has not done pl-exit or cpu relax intercepted recently * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
* (preempted lock holder), indicated by @in_spin_loop. * (preempted lock holder), indicated by @in_spin_loop.
* Set at the beiginning and cleared at the end of interception/PLE handler. * Set at the beginning and cleared at the end of interception/PLE handler.
* *
* (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
* chance last time (mostly it has become eligible now since we have probably * chance last time (mostly it has become eligible now since we have probably