From 9ed24f4b712b855dcf7be3025b75b051cb73a2b7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 May 2020 11:40:34 +0100 Subject: [PATCH 01/24] KVM: arm64: Move virt/kvm/arm to arch/arm64 Now that the 32bit KVM/arm host is a distant memory, let's move the whole of the KVM/arm64 code into the arm64 tree. As they said in the song: Welcome Home (Sanitarium). Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200513104034.74741-1-maz@kernel.org --- MAINTAINERS | 1 - arch/arm64/kvm/Makefile | 44 ++-- {virt/kvm/arm => arch/arm64/kvm}/aarch32.c | 0 {virt/kvm/arm => arch/arm64/kvm}/arch_timer.c | 0 {virt/kvm/arm => arch/arm64/kvm}/arm.c | 2 +- arch/arm64/kvm/handle_exit.c | 2 +- arch/arm64/kvm/hyp/Makefile | 9 +- .../kvm/arm => arch/arm64/kvm}/hyp/aarch32.c | 0 .../kvm/arm => arch/arm64/kvm}/hyp/timer-sr.c | 0 .../arm => arch/arm64/kvm}/hyp/vgic-v3-sr.c | 4 - {virt/kvm/arm => arch/arm64/kvm}/hypercalls.c | 0 {virt/kvm/arm => arch/arm64/kvm}/mmio.c | 0 {virt/kvm/arm => arch/arm64/kvm}/mmu.c | 0 {virt/kvm/arm => arch/arm64/kvm}/perf.c | 0 .../arm/pmu.c => arch/arm64/kvm/pmu-emul.c | 0 {virt/kvm/arm => arch/arm64/kvm}/psci.c | 0 {virt/kvm/arm => arch/arm64/kvm}/pvtime.c | 0 arch/arm64/kvm/trace.h | 216 +----------------- .../arm/trace.h => arch/arm64/kvm/trace_arm.h | 11 +- arch/arm64/kvm/trace_handle_exit.h | 215 +++++++++++++++++ arch/arm64/kvm/vgic-sys-reg-v3.c | 2 +- {virt/kvm/arm => arch/arm64/kvm}/vgic/trace.h | 2 +- .../arm => arch/arm64/kvm}/vgic/vgic-debug.c | 0 .../arm => arch/arm64/kvm}/vgic/vgic-init.c | 0 .../arm => arch/arm64/kvm}/vgic/vgic-irqfd.c | 0 .../arm => arch/arm64/kvm}/vgic/vgic-its.c | 0 .../arm64/kvm}/vgic/vgic-kvm-device.c | 0 .../arm64/kvm}/vgic/vgic-mmio-v2.c | 0 .../arm64/kvm}/vgic/vgic-mmio-v3.c | 0 .../arm => arch/arm64/kvm}/vgic/vgic-mmio.c | 0 .../arm => arch/arm64/kvm}/vgic/vgic-mmio.h | 0 .../kvm/arm => arch/arm64/kvm}/vgic/vgic-v2.c | 0 .../kvm/arm => arch/arm64/kvm}/vgic/vgic-v3.c | 2 - .../kvm/arm => arch/arm64/kvm}/vgic/vgic-v4.c | 0 {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic.c | 0 {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic.h | 0 36 files changed, 253 insertions(+), 257 deletions(-) rename {virt/kvm/arm => arch/arm64/kvm}/aarch32.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/arch_timer.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/arm.c (99%) rename {virt/kvm/arm => arch/arm64/kvm}/hyp/aarch32.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/hyp/timer-sr.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/hyp/vgic-v3-sr.c (99%) rename {virt/kvm/arm => arch/arm64/kvm}/hypercalls.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/mmio.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/mmu.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/perf.c (100%) rename virt/kvm/arm/pmu.c => arch/arm64/kvm/pmu-emul.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/psci.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/pvtime.c (100%) rename virt/kvm/arm/trace.h => arch/arm64/kvm/trace_arm.h (97%) create mode 100644 arch/arm64/kvm/trace_handle_exit.h rename {virt/kvm/arm => arch/arm64/kvm}/vgic/trace.h (93%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-debug.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-init.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-irqfd.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-its.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-kvm-device.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-mmio-v2.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-mmio-v3.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-mmio.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-mmio.h (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-v2.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-v3.c (99%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic-v4.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic.c (100%) rename {virt/kvm/arm => arch/arm64/kvm}/vgic/vgic.h (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 091ec22c1a23..6c5b928989ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9295,7 +9295,6 @@ F: arch/arm64/include/asm/kvm* F: arch/arm64/include/uapi/asm/kvm* F: arch/arm64/kvm/ F: include/kvm/arm_* -F: virt/kvm/arm/ KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) L: linux-mips@vger.kernel.org diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5ffbdc39e780..7a3768538343 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -3,37 +3,37 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic +ccflags-y += -I $(srctree)/$(src) KVM=../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o obj-$(CONFIG_KVM_ARM_HOST) += hyp/ -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/eventfd.o $(KVM)/vfio.o $(KVM)/irqchip.o +kvm-$(CONFIG_KVM_ARM_HOST) += arm.o mmu.o mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += psci.o perf.o +kvm-$(CONFIG_KVM_ARM_HOST) += hypercalls.o +kvm-$(CONFIG_KVM_ARM_HOST) += pvtime.o kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o +kvm-$(CONFIG_KVM_ARM_HOST) += aarch32.o +kvm-$(CONFIG_KVM_ARM_HOST) += arch_timer.o +kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o -kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-init.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-irqfd.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-v4.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-mmio-v2.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-mmio-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-kvm-device.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-its.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-debug.o diff --git a/virt/kvm/arm/aarch32.c b/arch/arm64/kvm/aarch32.c similarity index 100% rename from virt/kvm/arm/aarch32.c rename to arch/arm64/kvm/aarch32.c diff --git a/virt/kvm/arm/arch_timer.c b/arch/arm64/kvm/arch_timer.c similarity index 100% rename from virt/kvm/arm/arch_timer.c rename to arch/arm64/kvm/arch_timer.c diff --git a/virt/kvm/arm/arm.c b/arch/arm64/kvm/arm.c similarity index 99% rename from virt/kvm/arm/arm.c rename to arch/arm64/kvm/arm.c index 48d0ec44ad77..c958bb37b769 100644 --- a/virt/kvm/arm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -22,7 +22,7 @@ #include #define CREATE_TRACE_POINTS -#include "trace.h" +#include "trace_arm.h" #include #include diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index aacfc55de44c..eb194696ef62 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,7 +23,7 @@ #include #define CREATE_TRACE_POINTS -#include "trace.h" +#include "trace_handle_exit.h" typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ea710f674cb6..dc18274a6826 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,12 +6,9 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -KVM=../../../../virt/kvm - -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o - +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o diff --git a/virt/kvm/arm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c similarity index 100% rename from virt/kvm/arm/hyp/aarch32.c rename to arch/arm64/kvm/hyp/aarch32.c diff --git a/virt/kvm/arm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c similarity index 100% rename from virt/kvm/arm/hyp/timer-sr.c rename to arch/arm64/kvm/hyp/timer-sr.c diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c similarity index 99% rename from virt/kvm/arm/hyp/vgic-v3-sr.c rename to arch/arm64/kvm/hyp/vgic-v3-sr.c index ccf1fde9836c..49fedf6710f9 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -431,8 +431,6 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) write_gicreg(vmcr, ICH_VMCR_EL2); } -#ifdef CONFIG_ARM64 - static int __hyp_text __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ @@ -1126,5 +1124,3 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) return 1; } - -#endif diff --git a/virt/kvm/arm/hypercalls.c b/arch/arm64/kvm/hypercalls.c similarity index 100% rename from virt/kvm/arm/hypercalls.c rename to arch/arm64/kvm/hypercalls.c diff --git a/virt/kvm/arm/mmio.c b/arch/arm64/kvm/mmio.c similarity index 100% rename from virt/kvm/arm/mmio.c rename to arch/arm64/kvm/mmio.c diff --git a/virt/kvm/arm/mmu.c b/arch/arm64/kvm/mmu.c similarity index 100% rename from virt/kvm/arm/mmu.c rename to arch/arm64/kvm/mmu.c diff --git a/virt/kvm/arm/perf.c b/arch/arm64/kvm/perf.c similarity index 100% rename from virt/kvm/arm/perf.c rename to arch/arm64/kvm/perf.c diff --git a/virt/kvm/arm/pmu.c b/arch/arm64/kvm/pmu-emul.c similarity index 100% rename from virt/kvm/arm/pmu.c rename to arch/arm64/kvm/pmu-emul.c diff --git a/virt/kvm/arm/psci.c b/arch/arm64/kvm/psci.c similarity index 100% rename from virt/kvm/arm/psci.c rename to arch/arm64/kvm/psci.c diff --git a/virt/kvm/arm/pvtime.c b/arch/arm64/kvm/pvtime.c similarity index 100% rename from virt/kvm/arm/pvtime.c rename to arch/arm64/kvm/pvtime.c diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h index eab91ad0effb..86f9ea47be29 100644 --- a/arch/arm64/kvm/trace.h +++ b/arch/arm64/kvm/trace.h @@ -1,216 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#ifndef _TRACE_ARM64_KVM_H #define _TRACE_ARM64_KVM_H -#include -#include "sys_regs.h" +#include "trace_arm.h" +#include "trace_handle_exit.h" -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kvm - -TRACE_EVENT(kvm_wfx_arm64, - TP_PROTO(unsigned long vcpu_pc, bool is_wfe), - TP_ARGS(vcpu_pc, is_wfe), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(bool, is_wfe) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->is_wfe = is_wfe; - ), - - TP_printk("guest executed wf%c at: 0x%08lx", - __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) -); - -TRACE_EVENT(kvm_hvc_arm64, - TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), - TP_ARGS(vcpu_pc, r0, imm), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(unsigned long, r0) - __field(unsigned long, imm) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->r0 = r0; - __entry->imm = imm; - ), - - TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", - __entry->vcpu_pc, __entry->r0, __entry->imm) -); - -TRACE_EVENT(kvm_arm_setup_debug, - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), - TP_ARGS(vcpu, guest_debug), - - TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->vcpu = vcpu; - __entry->guest_debug = guest_debug; - ), - - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) -); - -TRACE_EVENT(kvm_arm_clear_debug, - TP_PROTO(__u32 guest_debug), - TP_ARGS(guest_debug), - - TP_STRUCT__entry( - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->guest_debug = guest_debug; - ), - - TP_printk("flags: 0x%08x", __entry->guest_debug) -); - -TRACE_EVENT(kvm_arm_set_dreg32, - TP_PROTO(const char *name, __u32 value), - TP_ARGS(name, value), - - TP_STRUCT__entry( - __field(const char *, name) - __field(__u32, value) - ), - - TP_fast_assign( - __entry->name = name; - __entry->value = value; - ), - - TP_printk("%s: 0x%08x", __entry->name, __entry->value) -); - -TRACE_DEFINE_SIZEOF(__u64); - -TRACE_EVENT(kvm_arm_set_regset, - TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), - TP_ARGS(type, len, control, value), - TP_STRUCT__entry( - __field(const char *, name) - __field(int, len) - __array(u64, ctrls, 16) - __array(u64, values, 16) - ), - TP_fast_assign( - __entry->name = type; - __entry->len = len; - memcpy(__entry->ctrls, control, len << 3); - memcpy(__entry->values, value, len << 3); - ), - TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, - __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), - __print_array(__entry->values, __entry->len, sizeof(__u64))) -); - -TRACE_EVENT(trap_reg, - TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), - TP_ARGS(fn, reg, is_write, write_value), - - TP_STRUCT__entry( - __field(const char *, fn) - __field(int, reg) - __field(bool, is_write) - __field(u64, write_value) - ), - - TP_fast_assign( - __entry->fn = fn; - __entry->reg = reg; - __entry->is_write = is_write; - __entry->write_value = write_value; - ), - - TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) -); - -TRACE_EVENT(kvm_handle_sys_reg, - TP_PROTO(unsigned long hsr), - TP_ARGS(hsr), - - TP_STRUCT__entry( - __field(unsigned long, hsr) - ), - - TP_fast_assign( - __entry->hsr = hsr; - ), - - TP_printk("HSR 0x%08lx", __entry->hsr) -); - -TRACE_EVENT(kvm_sys_access, - TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg), - TP_ARGS(vcpu_pc, params, reg), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(bool, is_write) - __field(const char *, name) - __field(u8, Op0) - __field(u8, Op1) - __field(u8, CRn) - __field(u8, CRm) - __field(u8, Op2) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->is_write = params->is_write; - __entry->name = reg->name; - __entry->Op0 = reg->Op0; - __entry->Op0 = reg->Op0; - __entry->Op1 = reg->Op1; - __entry->CRn = reg->CRn; - __entry->CRm = reg->CRm; - __entry->Op2 = reg->Op2; - ), - - TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s", - __entry->vcpu_pc, __entry->name ?: "UNKN", - __entry->Op0, __entry->Op1, __entry->CRn, - __entry->CRm, __entry->Op2, - __entry->is_write ? "write" : "read") -); - -TRACE_EVENT(kvm_set_guest_debug, - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), - TP_ARGS(vcpu, guest_debug), - - TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->vcpu = vcpu; - __entry->guest_debug = guest_debug; - ), - - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) -); - - -#endif /* _TRACE_ARM64_KVM_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace - -/* This part must be outside protection */ -#include +#endif /* _TRACE_ARM64_KVM_H */ diff --git a/virt/kvm/arm/trace.h b/arch/arm64/kvm/trace_arm.h similarity index 97% rename from virt/kvm/arm/trace.h rename to arch/arm64/kvm/trace_arm.h index cc94ccc68821..4c71270cc097 100644 --- a/virt/kvm/arm/trace.h +++ b/arch/arm64/kvm/trace_arm.h @@ -1,10 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KVM_H +#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ARM_ARM64_KVM_H #include #include -#include #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm @@ -368,12 +367,12 @@ TRACE_EVENT(kvm_timer_emulate, __entry->timer_idx, __entry->should_fire) ); -#endif /* _TRACE_KVM_H */ +#endif /* _TRACE_ARM_ARM64_KVM_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../virt/kvm/arm +#define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace +#define TRACE_INCLUDE_FILE trace_arm /* This part must be outside protection */ #include diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h new file mode 100644 index 000000000000..2c56d1e0f5bd --- /dev/null +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_HANDLE_EXIT_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HANDLE_EXIT_ARM64_KVM_H + +#include +#include "sys_regs.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(kvm_wfx_arm64, + TP_PROTO(unsigned long vcpu_pc, bool is_wfe), + TP_ARGS(vcpu_pc, is_wfe), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_wfe) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_wfe = is_wfe; + ), + + TP_printk("guest executed wf%c at: 0x%08lx", + __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_hvc_arm64, + TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), + TP_ARGS(vcpu_pc, r0, imm), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(unsigned long, r0) + __field(unsigned long, imm) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->r0 = r0; + __entry->imm = imm; + ), + + TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", + __entry->vcpu_pc, __entry->r0, __entry->imm) +); + +TRACE_EVENT(kvm_arm_setup_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_clear_debug, + TP_PROTO(__u32 guest_debug), + TP_ARGS(guest_debug), + + TP_STRUCT__entry( + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->guest_debug = guest_debug; + ), + + TP_printk("flags: 0x%08x", __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_set_dreg32, + TP_PROTO(const char *name, __u32 value), + TP_ARGS(name, value), + + TP_STRUCT__entry( + __field(const char *, name) + __field(__u32, value) + ), + + TP_fast_assign( + __entry->name = name; + __entry->value = value; + ), + + TP_printk("%s: 0x%08x", __entry->name, __entry->value) +); + +TRACE_DEFINE_SIZEOF(__u64); + +TRACE_EVENT(kvm_arm_set_regset, + TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), + TP_ARGS(type, len, control, value), + TP_STRUCT__entry( + __field(const char *, name) + __field(int, len) + __array(u64, ctrls, 16) + __array(u64, values, 16) + ), + TP_fast_assign( + __entry->name = type; + __entry->len = len; + memcpy(__entry->ctrls, control, len << 3); + memcpy(__entry->values, value, len << 3); + ), + TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, + __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), + __print_array(__entry->values, __entry->len, sizeof(__u64))) +); + +TRACE_EVENT(trap_reg, + TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), + TP_ARGS(fn, reg, is_write, write_value), + + TP_STRUCT__entry( + __field(const char *, fn) + __field(int, reg) + __field(bool, is_write) + __field(u64, write_value) + ), + + TP_fast_assign( + __entry->fn = fn; + __entry->reg = reg; + __entry->is_write = is_write; + __entry->write_value = write_value; + ), + + TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) +); + +TRACE_EVENT(kvm_handle_sys_reg, + TP_PROTO(unsigned long hsr), + TP_ARGS(hsr), + + TP_STRUCT__entry( + __field(unsigned long, hsr) + ), + + TP_fast_assign( + __entry->hsr = hsr; + ), + + TP_printk("HSR 0x%08lx", __entry->hsr) +); + +TRACE_EVENT(kvm_sys_access, + TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg), + TP_ARGS(vcpu_pc, params, reg), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_write) + __field(const char *, name) + __field(u8, Op0) + __field(u8, Op1) + __field(u8, CRn) + __field(u8, CRm) + __field(u8, Op2) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_write = params->is_write; + __entry->name = reg->name; + __entry->Op0 = reg->Op0; + __entry->Op0 = reg->Op0; + __entry->Op1 = reg->Op1; + __entry->CRn = reg->CRn; + __entry->CRm = reg->CRm; + __entry->Op2 = reg->Op2; + ), + + TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s", + __entry->vcpu_pc, __entry->name ?: "UNKN", + __entry->Op0, __entry->Op1, __entry->CRn, + __entry->CRm, __entry->Op2, + __entry->is_write ? "write" : "read") +); + +TRACE_EVENT(kvm_set_guest_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +#endif /* _TRACE_HANDLE_EXIT_ARM64_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_handle_exit + +/* This part must be outside protection */ +#include diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index e7d1ea92095d..2f92bdcb1188 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -7,7 +7,7 @@ #include #include #include -#include "vgic.h" +#include "vgic/vgic.h" #include "sys_regs.h" static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, diff --git a/virt/kvm/arm/vgic/trace.h b/arch/arm64/kvm/vgic/trace.h similarity index 93% rename from virt/kvm/arm/vgic/trace.h rename to arch/arm64/kvm/vgic/trace.h index 4fd4f6db181b..83c64401a7fc 100644 --- a/virt/kvm/arm/vgic/trace.h +++ b/arch/arm64/kvm/vgic/trace.h @@ -30,7 +30,7 @@ TRACE_EVENT(vgic_update_irq_pending, #endif /* _TRACE_VGIC_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH ../../virt/kvm/arm/vgic +#define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-debug.c rename to arch/arm64/kvm/vgic/vgic-debug.c diff --git a/virt/kvm/arm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-init.c rename to arch/arm64/kvm/vgic/vgic-init.c diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-irqfd.c rename to arch/arm64/kvm/vgic/vgic-irqfd.c diff --git a/virt/kvm/arm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-its.c rename to arch/arm64/kvm/vgic/vgic-its.c diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-kvm-device.c rename to arch/arm64/kvm/vgic/vgic-kvm-device.c diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-mmio-v2.c rename to arch/arm64/kvm/vgic/vgic-mmio-v2.c diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-mmio-v3.c rename to arch/arm64/kvm/vgic/vgic-mmio-v3.c diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-mmio.c rename to arch/arm64/kvm/vgic/vgic-mmio.c diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h similarity index 100% rename from virt/kvm/arm/vgic/vgic-mmio.h rename to arch/arm64/kvm/vgic/vgic-mmio.h diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-v2.c rename to arch/arm64/kvm/vgic/vgic-v2.c diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c similarity index 99% rename from virt/kvm/arm/vgic/vgic-v3.c rename to arch/arm64/kvm/vgic/vgic-v3.c index 2c9fc13e2c59..5bc2ab58954b 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -630,12 +630,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); -#ifdef CONFIG_ARM64 if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { group0_trap = true; group1_trap = true; } -#endif if (group0_trap || group1_trap || common_trap) { kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c similarity index 100% rename from virt/kvm/arm/vgic/vgic-v4.c rename to arch/arm64/kvm/vgic/vgic-v4.c diff --git a/virt/kvm/arm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c similarity index 100% rename from virt/kvm/arm/vgic/vgic.c rename to arch/arm64/kvm/vgic/vgic.c diff --git a/virt/kvm/arm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h similarity index 100% rename from virt/kvm/arm/vgic/vgic.h rename to arch/arm64/kvm/vgic/vgic.h From d82755b2e781c8989614c82df7582f5649e265b8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 May 2020 16:45:17 +0100 Subject: [PATCH 02/24] KVM: arm64: Kill off CONFIG_KVM_ARM_HOST CONFIG_KVM_ARM_HOST is just a proxy for CONFIG_KVM, so remove it in favour of the latter. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200505154520.194120-2-tabba@google.com --- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/cpu_errata.c | 2 +- arch/arm64/kernel/smp.c | 2 +- arch/arm64/kvm/Kconfig | 6 ---- arch/arm64/kvm/Makefile | 52 ++++++++++++++++----------------- arch/arm64/kvm/hyp/Makefile | 22 +++++++------- 6 files changed, 40 insertions(+), 46 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9981a0a5a87f..a27e0cd731e9 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -96,7 +96,7 @@ int main(void) DEFINE(CPU_BOOT_PTRAUTH_KEY, offsetof(struct secondary_data, ptrauth_key)); #endif BLANK(); -#ifdef CONFIG_KVM_ARM_HOST +#ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index df56d2295d16..a102321fc8a2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -234,7 +234,7 @@ static int detect_harden_bp_fw(void) smccc_end = NULL; break; -#if IS_ENABLED(CONFIG_KVM_ARM_HOST) +#if IS_ENABLED(CONFIG_KVM) case SMCCC_CONDUIT_SMC: cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 061f60fe452f..0a3045d9f33f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -430,7 +430,7 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (IS_ENABLED(CONFIG_KVM)) kvm_compute_layout(); } diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 449386d76441..ce724e526689 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -28,7 +28,6 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO - select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU select KVM_VFIO @@ -50,11 +49,6 @@ config KVM If unsure, say N. -config KVM_ARM_HOST - bool - ---help--- - Provides host support for ARM processors. - config KVM_ARM_PMU bool ---help--- diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 7a3768538343..419696e615b3 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -7,33 +7,33 @@ ccflags-y += -I $(srctree)/$(src) KVM=../../../virt/kvm -obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp/ +obj-$(CONFIG_KVM) += kvm.o +obj-$(CONFIG_KVM) += hyp/ -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/eventfd.o $(KVM)/vfio.o $(KVM)/irqchip.o -kvm-$(CONFIG_KVM_ARM_HOST) += arm.o mmu.o mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += psci.o perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += hypercalls.o -kvm-$(CONFIG_KVM_ARM_HOST) += pvtime.o +kvm-$(CONFIG_KVM) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM) += $(KVM)/eventfd.o $(KVM)/vfio.o $(KVM)/irqchip.o +kvm-$(CONFIG_KVM) += arm.o mmu.o mmio.o +kvm-$(CONFIG_KVM) += psci.o perf.o +kvm-$(CONFIG_KVM) += hypercalls.o +kvm-$(CONFIG_KVM) += pvtime.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o -kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o -kvm-$(CONFIG_KVM_ARM_HOST) += aarch32.o -kvm-$(CONFIG_KVM_ARM_HOST) += arch_timer.o +kvm-$(CONFIG_KVM) += inject_fault.o regmap.o va_layout.o +kvm-$(CONFIG_KVM) += hyp.o hyp-init.o handle_exit.o +kvm-$(CONFIG_KVM) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o +kvm-$(CONFIG_KVM) += vgic-sys-reg-v3.o fpsimd.o pmu.o +kvm-$(CONFIG_KVM) += aarch32.o +kvm-$(CONFIG_KVM) += arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-init.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-irqfd.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-v4.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-mmio-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-mmio-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-kvm-device.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-its.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic/vgic-debug.o +kvm-$(CONFIG_KVM) += vgic/vgic.o +kvm-$(CONFIG_KVM) += vgic/vgic-init.o +kvm-$(CONFIG_KVM) += vgic/vgic-irqfd.o +kvm-$(CONFIG_KVM) += vgic/vgic-v2.o +kvm-$(CONFIG_KVM) += vgic/vgic-v3.o +kvm-$(CONFIG_KVM) += vgic/vgic-v4.o +kvm-$(CONFIG_KVM) += vgic/vgic-mmio.o +kvm-$(CONFIG_KVM) += vgic/vgic-mmio-v2.o +kvm-$(CONFIG_KVM) += vgic/vgic-mmio-v3.o +kvm-$(CONFIG_KVM) += vgic/vgic-kvm-device.o +kvm-$(CONFIG_KVM) += vgic/vgic-its.o +kvm-$(CONFIG_KVM) += vgic/vgic-debug.o diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index dc18274a6826..8229e47ba870 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,17 +6,17 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += aarch32.o -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o -obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += entry.o -obj-$(CONFIG_KVM_ARM_HOST) += switch.o -obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o -obj-$(CONFIG_KVM_ARM_HOST) += tlb.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o +obj-$(CONFIG_KVM) += vgic-v3-sr.o +obj-$(CONFIG_KVM) += timer-sr.o +obj-$(CONFIG_KVM) += aarch32.o +obj-$(CONFIG_KVM) += vgic-v2-cpuif-proxy.o +obj-$(CONFIG_KVM) += sysreg-sr.o +obj-$(CONFIG_KVM) += debug-sr.o +obj-$(CONFIG_KVM) += entry.o +obj-$(CONFIG_KVM) += switch.o +obj-$(CONFIG_KVM) += fpsimd.o +obj-$(CONFIG_KVM) += tlb.o +obj-$(CONFIG_KVM) += hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may From bf7bc1df30f6c6afa34d4d1d53e1c8ad93510d3e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 May 2020 16:45:18 +0100 Subject: [PATCH 03/24] KVM: arm64: Update help text arm64 KVM supports 16k pages since 02e0b7600f83 ("arm64: kvm: Add support for 16K pages"), so update the Kconfig help text accordingly. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200505154520.194120-3-tabba@google.com --- arch/arm64/kvm/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index ce724e526689..d2cf4f099454 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -44,8 +44,6 @@ config KVM select TASK_DELAY_ACCT ---help--- Support hosting virtualized guest machines. - We don't support KVM with 16K page tables yet, due to the multiple - levels of fake page tables. If unsure, say N. From f26133624d602b0d984815168a2d3a1f630b02e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 May 2020 16:45:19 +0100 Subject: [PATCH 04/24] KVM: arm64: Change CONFIG_KVM to a menuconfig entry Changing CONFIG_KVM to be a 'menuconfig' entry in Kconfig mean that we can straightforwardly enumerate optional features, such as the virtual PMU device as dependent options. Signed-off-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200505154520.194120-4-tabba@google.com --- arch/arm64/kvm/Kconfig | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d2cf4f099454..f1c1f981482c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -3,7 +3,6 @@ # KVM configuration # -source "virt/kvm/Kconfig" source "virt/lib/Kconfig" menuconfig VIRTUALIZATION @@ -18,7 +17,7 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION -config KVM +menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF # for TASKSTATS/TASK_DELAY_ACCT: @@ -33,7 +32,6 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD - select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING @@ -47,13 +45,21 @@ config KVM If unsure, say N. +if KVM + +source "virt/kvm/Kconfig" + config KVM_ARM_PMU - bool + bool "Virtual Performance Monitoring Unit (PMU) support" + depends on HW_PERF_EVENTS + default y ---help--- Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. config KVM_INDIRECT_VECTORS - def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) + def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS + +endif # KVM endif # VIRTUALIZATION From 25357de01b95140ecacd4d9347d74df2dda789f2 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 5 May 2020 16:45:20 +0100 Subject: [PATCH 05/24] KVM: arm64: Clean up kvm makefiles Consolidate references to the CONFIG_KVM configuration item to encompass entire folders rather than per line. Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200505154520.194120-5-tabba@google.com --- arch/arm64/kvm/Makefile | 38 +++++++++++++------------------------ arch/arm64/kvm/hyp/Makefile | 15 ++++----------- 2 files changed, 17 insertions(+), 36 deletions(-) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 419696e615b3..8d3d9513cbfe 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,30 +10,18 @@ KVM=../../../virt/kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ -kvm-$(CONFIG_KVM) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o -kvm-$(CONFIG_KVM) += $(KVM)/eventfd.o $(KVM)/vfio.o $(KVM)/irqchip.o -kvm-$(CONFIG_KVM) += arm.o mmu.o mmio.o -kvm-$(CONFIG_KVM) += psci.o perf.o -kvm-$(CONFIG_KVM) += hypercalls.o -kvm-$(CONFIG_KVM) += pvtime.o +kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ + $(KVM)/vfio.o $(KVM)/irqchip.o \ + arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ + inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ + guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ + vgic-sys-reg-v3.o fpsimd.o pmu.o \ + aarch32.o arch_timer.o \ + vgic/vgic.o vgic/vgic-init.o \ + vgic/vgic-irqfd.o vgic/vgic-v2.o \ + vgic/vgic-v3.o vgic/vgic-v4.o \ + vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ + vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ + vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_KVM) += inject_fault.o regmap.o va_layout.o -kvm-$(CONFIG_KVM) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM) += vgic-sys-reg-v3.o fpsimd.o pmu.o -kvm-$(CONFIG_KVM) += aarch32.o -kvm-$(CONFIG_KVM) += arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o - -kvm-$(CONFIG_KVM) += vgic/vgic.o -kvm-$(CONFIG_KVM) += vgic/vgic-init.o -kvm-$(CONFIG_KVM) += vgic/vgic-irqfd.o -kvm-$(CONFIG_KVM) += vgic/vgic-v2.o -kvm-$(CONFIG_KVM) += vgic/vgic-v3.o -kvm-$(CONFIG_KVM) += vgic/vgic-v4.o -kvm-$(CONFIG_KVM) += vgic/vgic-mmio.o -kvm-$(CONFIG_KVM) += vgic/vgic-mmio-v2.o -kvm-$(CONFIG_KVM) += vgic/vgic-mmio-v3.o -kvm-$(CONFIG_KVM) += vgic/vgic-kvm-device.o -kvm-$(CONFIG_KVM) += vgic/vgic-its.o -kvm-$(CONFIG_KVM) += vgic/vgic-debug.o diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 8229e47ba870..8c9880783839 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,17 +6,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vgic-v3-sr.o -obj-$(CONFIG_KVM) += timer-sr.o -obj-$(CONFIG_KVM) += aarch32.o -obj-$(CONFIG_KVM) += vgic-v2-cpuif-proxy.o -obj-$(CONFIG_KVM) += sysreg-sr.o -obj-$(CONFIG_KVM) += debug-sr.o -obj-$(CONFIG_KVM) += entry.o -obj-$(CONFIG_KVM) += switch.o -obj-$(CONFIG_KVM) += fpsimd.o -obj-$(CONFIG_KVM) += tlb.o -obj-$(CONFIG_KVM) += hyp-entry.o +obj-$(CONFIG_KVM) += hyp.o + +hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ + debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may From c6fe89ff8b250ad4dc4bed7bd5877bfbc35f4aba Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 May 2020 11:58:29 +0100 Subject: [PATCH 06/24] KVM: arm64: Simplify __kvm_timer_set_cntvoff implementation Now that this function isn't constrained by the 32bit PCS, let's simplify it by taking a single 64bit offset instead of two 32bit parameters. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/kvm/arch_timer.c | 12 +----------- arch/arm64/kvm/hyp/timer-sr.c | 3 +-- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7c7eeeaab9fa..59e314f38e43 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -64,7 +64,7 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); -extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); +extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 93bd59b46848..487eba9f87cd 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -451,17 +451,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) static void set_cntvoff(u64 cntvoff) { - u32 low = lower_32_bits(cntvoff); - u32 high = upper_32_bits(cntvoff); - - /* - * Since kvm_call_hyp doesn't fully support the ARM PCS especially on - * 32-bit systems, but rather passes register by register shifted one - * place (we put the function address in r0/x0), we cannot simply pass - * a 64-bit value as an argument, but have to split the value in two - * 32-bit halves. - */ - kvm_call_hyp(__kvm_timer_set_cntvoff, low, high); + kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); } static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c index ff76e6845fe4..fb5c0be33223 100644 --- a/arch/arm64/kvm/hyp/timer-sr.c +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -10,9 +10,8 @@ #include -void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) +void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) { - u64 cntvoff = (u64)cntvoff_high << 32 | cntvoff_low; write_sysreg(cntvoff, cntvoff_el2); } From ce6f8f02f9f6786355fa6c79d88b839639dd75d8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 May 2020 11:38:28 +0100 Subject: [PATCH 07/24] KVM: arm64: Use cpus_have_final_cap for has_vhe() By the time we start using the has_vhe() helper, we have long discovered whether we are running VHE or not. It thus makes sense to use cpus_have_final_cap() instead of cpus_have_const_cap(), which leads to a small text size reduction. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Link: https://lore.kernel.org/r/20200513103828.74580-1-maz@kernel.org --- arch/arm64/include/asm/virt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 61fd26752adc..5051b388c654 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,7 +85,7 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) + if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) return true; return false; From 656012c731fcfd0f770007366e2b952a613745f2 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Wed, 1 Apr 2020 15:03:10 +0100 Subject: [PATCH 08/24] KVM: Fix spelling in code comments Fix spelling and typos (e.g., repeated words) in comments. Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200401140310.29701-1-tabba@google.com --- arch/arm64/kvm/arm.c | 6 +++--- arch/arm64/kvm/guest.c | 4 ++-- arch/arm64/kvm/hyp/vgic-v3-sr.c | 2 +- arch/arm64/kvm/mmio.c | 2 +- arch/arm64/kvm/mmu.c | 6 +++--- arch/arm64/kvm/psci.c | 6 +++--- arch/arm64/kvm/reset.c | 6 +++--- arch/arm64/kvm/sys_regs.c | 6 +++--- arch/arm64/kvm/vgic/vgic-v3.c | 2 +- virt/kvm/coalesced_mmio.c | 2 +- virt/kvm/eventfd.c | 2 +- virt/kvm/kvm_main.c | 2 +- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c958bb37b769..ee1b5bba1d08 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -455,9 +455,9 @@ void force_vm_exit(const cpumask_t *mask) * * The hardware supports a limited set of values with the value zero reserved * for the host, so we check if an assigned value belongs to a previous - * generation, which which requires us to assign a new value. If we're the - * first to use a VMID for the new generation, we must flush necessary caches - * and TLBs on all CPUs. + * generation, which requires us to assign a new value. If we're the first to + * use a VMID for the new generation, we must flush necessary caches and TLBs + * on all CPUs. */ static bool need_new_vmid_gen(struct kvm_vmid *vmid) { diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 50a279d3ddd7..871d51729b63 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -267,7 +267,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) /* * Vector lengths supported by the host can't currently be * hidden from the guest individually: instead we can only set a - * maxmium via ZCR_EL2.LEN. So, make sure the available vector + * maximum via ZCR_EL2.LEN. So, make sure the available vector * lengths match the set requested exactly up to the requested * maximum: */ @@ -337,7 +337,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region, unsigned int reg_num; unsigned int reqoffset, reqlen; /* User-requested offset and length */ - unsigned int maxlen; /* Maxmimum permitted length */ + unsigned int maxlen; /* Maximum permitted length */ size_t sve_state_size; diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 49fedf6710f9..6b85773e15c4 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -577,7 +577,7 @@ static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) /* * The priority value is independent of any of the BPR values, so we - * normalize it using the minumal BPR value. This guarantees that no + * normalize it using the minimal BPR value. This guarantees that no * matter what the guest does with its BPR, we can always set/get the * same value of a priority. */ diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index aedfcff99ac5..4e0366759726 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -131,7 +131,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, /* * No valid syndrome? Ask userspace for help if it has - * voluntered to do so, and bail out otherwise. + * volunteered to do so, and bail out otherwise. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e3b9ee268823..29d8f24df944 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -784,7 +784,7 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, mutex_lock(&kvm_hyp_pgd_mutex); /* - * This assumes that we we have enough space below the idmap + * This assumes that we have enough space below the idmap * page to allocate our VAs. If not, the check below will * kick. A potential alternative would be to detect that * overflow and switch to an allocation above the idmap. @@ -964,7 +964,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, * stage2_unmap_vm - Unmap Stage-2 RAM mappings * @kvm: The struct kvm pointer * - * Go through the memregions and unmap any reguler RAM + * Go through the memregions and unmap any regular RAM * backing memory already mapped to the VM. */ void stage2_unmap_vm(struct kvm *kvm) @@ -2262,7 +2262,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { /* * At this point memslot has been committed and there is an - * allocated dirty_bitmap[], dirty pages will be be tracked while the + * allocated dirty_bitmap[], dirty pages will be tracked while the * memory slot is write protected. */ if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index ae364716ee40..83415e96b589 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -94,7 +94,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) /* * NOTE: We always update r0 (or x0) because for PSCI v0.1 - * the general puspose registers are undefined upon CPU_ON. + * the general purpose registers are undefined upon CPU_ON. */ reset_state->r0 = smccc_get_arg3(source_vcpu); @@ -265,10 +265,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) case PSCI_0_2_FN_SYSTEM_OFF: kvm_psci_system_off(vcpu); /* - * We should'nt be going back to guest VCPU after + * We shouldn't be going back to guest VCPU after * receiving SYSTEM_OFF request. * - * If user space accidently/deliberately resumes + * If user space accidentally/deliberately resumes * guest VCPU after SYSTEM_OFF request then guest * VCPU should see internal failure from PSCI return * value. To achieve this, we preload r0 (or x0) with diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 30b7ea680f66..658f3a79617b 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -163,7 +163,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) vl = vcpu->arch.sve_max_vl; /* - * Resposibility for these properties is shared between + * Responsibility for these properties is shared between * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ @@ -249,7 +249,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) * ioctl or as part of handling a request issued by another VCPU in the PSCI * handling code. In the first case, the VCPU will not be loaded, and in the * second case the VCPU will be loaded. Because this function operates purely - * on the memory-backed valus of system registers, we want to do a full put if + * on the memory-backed values of system registers, we want to do a full put if * we were loaded (handling a request) and load the values back at the end of * the function. Otherwise we leave the state alone. In both cases, we * disable preemption around the vcpu reset as we would otherwise race with @@ -357,7 +357,7 @@ void kvm_set_ipa_limit(void) * * So clamp the ipa limit further down to limit the number of levels. * Since we can concatenate upto 16 tables at entry level, we could - * go upto 4bits above the maximum VA addressible with the current + * go upto 4bits above the maximum VA addressable with the current * number of levels. */ va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 51db934702b6..620eaf11e672 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -34,7 +34,7 @@ #include "trace.h" /* - * All of this file is extremly similar to the ARM coproc.c, but the + * All of this file is extremely similar to the ARM coproc.c, but the * types are different. My gut feeling is that it should be pretty * easy to merge, but that would be an ABI breakage -- again. VFP * would also need to be abstracted. @@ -118,8 +118,8 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) * entry to the guest but are only restored on vcpu_load. * * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the the MPIDR should only be - * set once, before running the VCPU, and never changed later. + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. */ switch (reg) { case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 5bc2ab58954b..3ccd6d3cb4d3 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -587,7 +587,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) int ret; /* - * The ListRegs field is 5 bits, but there is a architectural + * The ListRegs field is 5 bits, but there is an architectural * maximum of 16 list registers. Just ignore bit 4... */ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 00c747dbc82e..e2c197fd4f9d 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -119,7 +119,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) /* * We're using this spinlock to sync access to the coalesced ring. - * The list doesn't need it's own lock since device registration and + * The list doesn't need its own lock since device registration and * unregistration should only happen when kvm->slots_lock is held. */ spin_lock_init(&kvm->ring_lock); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 67b6fc153e9c..e586d1395c28 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -116,7 +116,7 @@ irqfd_shutdown(struct work_struct *work) struct kvm *kvm = irqfd->kvm; u64 cnt; - /* Make sure irqfd has been initalized in assign path. */ + /* Make sure irqfd has been initialized in assign path. */ synchronize_srcu(&kvm->irq_srcu); /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 74bdb7bf3295..f57792b1541b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2799,7 +2799,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); * * (a) VCPU which has not done pl-exit or cpu relax intercepted recently * (preempted lock holder), indicated by @in_spin_loop. - * Set at the beiginning and cleared at the end of interception/PLE handler. + * Set at the beginning and cleared at the end of interception/PLE handler. * * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get * chance last time (mostly it has become eligible now since we have probably From 892713e97ca146591515b3c115f99cdf632030fb Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 15 Apr 2020 15:28:35 +0800 Subject: [PATCH 09/24] KVM: arm64: Sidestep stage2_unmap_vm() on vcpu reset when S2FWB is supported stage2_unmap_vm() was introduced to unmap user RAM region in the stage2 page table to make the caches coherent. E.g., a guest reboot with stage1 MMU disabled will access memory using non-cacheable attributes. If the RAM and caches are not coherent at this stage, some evicted dirty cache line may go and corrupt guest data in RAM. Since ARMv8.4, S2FWB feature is mandatory and KVM will take advantage of it to configure the stage2 page table and the attributes of memory access. So we ensure that guests always access memory using cacheable attributes and thus, the caches always be coherent. So on CPUs that support S2FWB, we can safely reset the vcpu without a heavy stage2 unmapping. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200415072835.1164-1-yuzenghui@huawei.com --- arch/arm64/kvm/arm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ee1b5bba1d08..0ea9a0266d9a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -983,8 +983,11 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, /* * Ensure a rebooted VM will fault in RAM pages and detect if the * guest MMU is turned off and flush the caches as needed. + * + * S2FWB enforces all memory accesses to RAM being cacheable, we + * ensure that the cache is always coherent. */ - if (vcpu->arch.has_run_once) + if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) stage2_unmap_vm(vcpu->kvm); vcpu_reset_hcr(vcpu); From 48c963e31bc664afafd31058483ea8390da63980 Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Wed, 15 Apr 2020 10:42:29 +0200 Subject: [PATCH 10/24] KVM: arm/arm64: Release kvm->mmu_lock in loop to prevent starvation Do cond_resched_lock() in stage2_flush_memslot() like what is done in unmap_stage2_range() and other places holding mmu_lock while processing a possibly large range of memory. Signed-off-by: Jiang Yi Signed-off-by: Marc Zyngier Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20200415084229.29992-1-giangyi@amazon.com --- arch/arm64/kvm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 29d8f24df944..917363375e8a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -422,6 +422,9 @@ static void stage2_flush_memslot(struct kvm *kvm, next = stage2_pgd_addr_end(kvm, addr, end); if (!stage2_pgd_none(kvm, *pgd)) stage2_flush_puds(kvm, pgd, addr, next); + + if (next != end) + cond_resched_lock(&kvm->mmu_lock); } while (pgd++, addr = next, addr != end); } From 9f2836146b11cdf98d5c8f8f71b0fce28fbd83c8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 7 May 2020 20:35:45 +0800 Subject: [PATCH 11/24] KVM: arm64: Clean up the checking for huge mapping If we are checking whether the stage2 can map PAGE_SIZE, we don't have to do the boundary checks as both the host VMA and the guest memslots are page aligned. Bail the case easily. While we're at it, fixup a typo in the comment below. Signed-off-by: Suzuki K Poulose Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200507123546.1875-2-yuzenghui@huawei.com --- arch/arm64/kvm/mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 917363375e8a..ccb44e7d30d9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1610,6 +1610,10 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, hva_t uaddr_start, uaddr_end; size_t size; + /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */ + if (map_size == PAGE_SIZE) + return true; + size = memslot->npages * PAGE_SIZE; gpa_start = memslot->base_gfn << PAGE_SHIFT; @@ -1629,7 +1633,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| * +-----+--------------------+--------------------+---+ * - * memslot->base_gfn << PAGE_SIZE: + * memslot->base_gfn << PAGE_SHIFT: * +---+--------------------+--------------------+-----+ * |abc|def Stage-2 block | Stage-2 block |tvxyz| * +---+--------------------+--------------------+-----+ From 0529c9021252a58b6d3808da86986a614b900b1b Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 7 May 2020 20:35:46 +0800 Subject: [PATCH 12/24] KVM: arm64: Unify handling THP backed host memory We support mapping host memory backed by PMD transparent hugepages at stage2 as huge pages. However the checks are now spread across two different places. Let us unify the handling of the THPs to keep the code cleaner (and future proof for PUD THP support). This patch moves transparent_hugepage_adjust() closer to the caller to avoid a forward declaration for fault_supports_stage2_huge_mappings(). Also, since we already handle the case where the host VA and the guest PA may not be aligned, the explicit VM_BUG_ON() is not required. Signed-off-by: Suzuki K Poulose Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200507123546.1875-3-yuzenghui@huawei.com --- arch/arm64/kvm/mmu.c | 115 ++++++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index ccb44e7d30d9..66eb8e3f6e8c 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1375,47 +1375,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, return ret; } -static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) -{ - kvm_pfn_t pfn = *pfnp; - gfn_t gfn = *ipap >> PAGE_SHIFT; - - if (kvm_is_transparent_hugepage(pfn)) { - unsigned long mask; - /* - * The address we faulted on is backed by a transparent huge - * page. However, because we map the compound huge page and - * not the individual tail page, we need to transfer the - * refcount to the head page. We have to be careful that the - * THP doesn't start to split while we are adjusting the - * refcounts. - * - * We are sure this doesn't happen, because mmu_notifier_retry - * was successful and we are holding the mmu_lock, so if this - * THP is trying to split, it will be blocked in the mmu - * notifier before touching any of the pages, specifically - * before being able to call __split_huge_page_refcount(). - * - * We can therefore safely transfer the refcount from PG_tail - * to PG_head and switch the pfn from a tail page to the head - * page accordingly. - */ - mask = PTRS_PER_PMD - 1; - VM_BUG_ON((gfn & mask) != (pfn & mask)); - if (pfn & mask) { - *ipap &= PMD_MASK; - kvm_release_pfn_clean(pfn); - pfn &= ~mask; - kvm_get_pfn(pfn); - *pfnp = pfn; - } - - return true; - } - - return false; -} - /** * stage2_wp_ptes - write protect PMD range * @pmd: pointer to pmd entry @@ -1663,6 +1622,59 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, (hva & ~(map_size - 1)) + map_size <= uaddr_end; } +/* + * Check if the given hva is backed by a transparent huge page (THP) and + * whether it can be mapped using block mapping in stage2. If so, adjust + * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently + * supported. This will need to be updated to support other THP sizes. + * + * Returns the size of the mapping. + */ +static unsigned long +transparent_hugepage_adjust(struct kvm_memory_slot *memslot, + unsigned long hva, kvm_pfn_t *pfnp, + phys_addr_t *ipap) +{ + kvm_pfn_t pfn = *pfnp; + + /* + * Make sure the adjustment is done only for THP pages. Also make + * sure that the HVA and IPA are sufficiently aligned and that the + * block map is contained within the memslot. + */ + if (kvm_is_transparent_hugepage(pfn) && + fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~(PTRS_PER_PMD - 1); + kvm_get_pfn(pfn); + *pfnp = pfn; + + return PMD_SIZE; + } + + /* Use page mapping if we cannot use block mapping. */ + return PAGE_SIZE; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1776,20 +1788,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (mmu_notifier_retry(kvm, mmu_seq)) goto out_unlock; - if (vma_pagesize == PAGE_SIZE && !force_pte) { - /* - * Only PMD_SIZE transparent hugepages(THP) are - * currently supported. This code will need to be - * updated to support other THP sizes. - * - * Make sure the host VA and the guest IPA are sufficiently - * aligned and that the block is contained within the memslot. - */ - if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) && - transparent_hugepage_adjust(&pfn, &fault_ipa)) - vma_pagesize = PMD_SIZE; - } - + /* + * If we are not forced to use page mapping, check if we are + * backed by a THP and thus use block mapping if possible. + */ + if (vma_pagesize == PAGE_SIZE && !force_pte) + vma_pagesize = transparent_hugepage_adjust(memslot, hva, + &pfn, &fault_ipa); if (writable) kvm_set_pfn_dirty(pfn); From c862626e19efdc26b26481515470b160e8fe52f3 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Mon, 13 Apr 2020 20:20:23 +0800 Subject: [PATCH 13/24] KVM: arm64: Support enabling dirty log gradually in small chunks There is already support of enabling dirty log gradually in small chunks for x86 in commit 3c9bd4006bfc ("KVM: x86: enable dirty log gradually in small chunks"). This adds support for arm64. x86 still writes protect all huge pages when DIRTY_LOG_INITIALLY_ALL_SET is enabled. However, for arm64, both huge pages and normal pages can be write protected gradually by userspace. Under the Huawei Kunpeng 920 2.6GHz platform, I did some tests on 128G Linux VMs with different page size. The memory pressure is 127G in each case. The time taken of memory_global_dirty_log_start in QEMU is listed below: Page Size Before After Optimization 4K 650ms 1.8ms 2M 4ms 1.8ms 1G 2ms 1.8ms Besides the time reduction, the biggest improvement is that we will minimize the performance side effect (because of dissolving huge pages and marking memslots dirty) on guest after enabling dirty log. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200413122023.52583-1-zhukeqian1@huawei.com --- Documentation/virt/kvm/api.rst | 2 +- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/mmu.c | 12 ++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index efbbe570aa9b..0017f63fa44f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5777,7 +5777,7 @@ will be initialized to 1 when created. This also improves performance because dirty logging can be enabled gradually in small chunks on the first call to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on -x86 for now). +x86 and arm64 for now). KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32c8a675e5a4..a723f84fab83 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,9 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 66eb8e3f6e8c..ddf85bf21897 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2277,8 +2277,16 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * allocated dirty_bitmap[], dirty pages will be tracked while the * memory slot is write protected. */ - if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) - kvm_mmu_wp_memory_region(kvm, mem->slot); + if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* + * If we're with initial-all-set, we don't need to write + * protect any pages because they're all reported as dirty. + * Huge pages and normal pages will be write protect gradually. + */ + if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) { + kvm_mmu_wp_memory_region(kvm, mem->slot); + } + } } int kvm_arch_prepare_memory_region(struct kvm *kvm, From 5107000faa6e8c2b0ff7a91a6d1f010f84596cd2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 27 Apr 2020 15:15:07 +0100 Subject: [PATCH 14/24] KVM: arm64: Make KVM_CAP_MAX_VCPUS compatible with the selected GIC version KVM_CAP_MAX_VCPUS always return the maximum possible number of VCPUs, irrespective of the selected interrupt controller. This is pretty misleading for userspace that selects a GICv2 on a GICv3 system that supports v2 compat: It always gets a maximum of 512 VCPUs, even if the effective limit is 8. The 9th VCPU will fail to be created, which is unexpected as far as userspace is concerned. Fortunately, we already have the right information stashed in the kvm structure, and we can return it as requested. Reported-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Tested-by: Alexandru Elisei Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20200427141507.284985-1-maz@kernel.org --- arch/arm64/kvm/arm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 0ea9a0266d9a..e01d44df98df 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -95,6 +95,11 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +static int kvm_arm_default_max_vcpus(void) +{ + return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; +} + /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct @@ -128,8 +133,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vmid.vmid_gen = 0; /* The maximum number of VCPUs is limited by the host's GIC model */ - kvm->arch.max_vcpus = vgic_present ? - kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; + kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); return ret; out_free_stage2_pgd: @@ -204,10 +208,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = num_online_cpus(); break; case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; - break; case KVM_CAP_MAX_VCPU_ID: - r = KVM_MAX_VCPU_ID; + if (kvm) + r = kvm->arch.max_vcpus; + else + r = kvm_arm_default_max_vcpus(); break; case KVM_CAP_MSI_DEVID: if (!kvm) From 71b3ec5f221b8b3ff545639be83ddfcd5d7c9800 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Fri, 15 May 2020 16:20:56 +0100 Subject: [PATCH 15/24] KVM: arm64: Clean up cpu_init_hyp_mode() Pull bits of code to the only place where it is used. Remove empty function __cpu_init_stage2(). Remove redundant has_vhe() check since this function is nVHE-only. No functional changes intended. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200515152056.83158-1-dbrazdil@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 ++ arch/arm64/include/asm/kvm_host.h | 35 ------------------------------- arch/arm64/kvm/arm.c | 32 +++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 59e314f38e43..0c9b5fc4ba0a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -70,6 +70,8 @@ extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern void __kvm_enable_ssbs(void); + extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a723f84fab83..69a338a390a6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -533,39 +533,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); } -void __kvm_enable_ssbs(void); - -static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, - unsigned long hyp_stack_ptr, - unsigned long vector_ptr) -{ - /* - * Calculate the raw per-cpu offset without a translation from the - * kernel's mapping to the linear mapping, and store it in tpidr_el2 - * so that we can use adr_l to access per-cpu variables in EL2. - */ - u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) - - (u64)kvm_ksym_ref(kvm_host_data)); - - /* - * Call initialization code, and switch to the full blown HYP code. - * If the cpucaps haven't been finalized yet, something has gone very - * wrong, and hyp will crash and burn when it uses any - * cpus_have_const_cap() wrapper. - */ - BUG_ON(!system_capabilities_finalized()); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); - - /* - * Disabling SSBD on a non-VHE system requires us to enable SSBS - * at EL2. - */ - if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && - arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); - } -} - static inline bool kvm_arch_requires_vhe(void) { /* @@ -601,8 +568,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -static inline void __cpu_init_stage2(void) {} - /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e01d44df98df..b0b569f2cdd0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1273,19 +1273,41 @@ static void cpu_init_hyp_mode(void) { phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; - unsigned long stack_page; unsigned long vector_ptr; + unsigned long tpidr_el2; /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - + (unsigned long)kvm_ksym_ref(kvm_host_data)); + pgd_ptr = kvm_mmu_get_httbr(); - stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); - hyp_stack_ptr = stack_page + PAGE_SIZE; + hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; vector_ptr = (unsigned long)kvm_get_hyp_vector(); - __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); - __cpu_init_stage2(); + /* + * Call initialization code, and switch to the full blown HYP code. + * If the cpucaps haven't been finalized yet, something has gone very + * wrong, and hyp will crash and burn when it uses any + * cpus_have_const_cap() wrapper. + */ + BUG_ON(!system_capabilities_finalized()); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } } static void cpu_hyp_reset(void) From 438f711ce1d889632467be80779c8f5762b107d7 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Fri, 15 May 2020 16:25:50 +0100 Subject: [PATCH 16/24] KVM: arm64: Fix incorrect comment on kvm_get_hyp_vector() The comment used to say that kvm_get_hyp_vector is only called on VHE systems. In fact, it is also called from the nVHE init function cpu_init_hyp_mode(). Fix the comment to stop confusing devs. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200515152550.83810-1-dbrazdil@google.com --- arch/arm64/include/asm/kvm_mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 30b0e8d6b895..796f6a2e794a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -473,7 +473,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is only called on a VHE system */ +/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); From 0a78791c0d12fcd5d3f486668defb9ab055e3729 Mon Sep 17 00:00:00 2001 From: Andrew Scull Date: Tue, 19 May 2020 11:40:36 +0100 Subject: [PATCH 17/24] KVM: arm64: Remove obsolete kvm_virt_to_phys abstraction This abstraction was introduced to hide the difference between arm and arm64 but, with the former no longer supported, this abstraction can be removed and the canonical kernel API used directly instead. Signed-off-by: Andrew Scull Signed-off-by: Marc Zyngier CC: Marc Zyngier CC: James Morse CC: Suzuki K Poulose Link: https://lore.kernel.org/r/20200519104036.259917-1-ascull@google.com --- arch/arm64/include/asm/kvm_mmu.h | 2 -- arch/arm64/kvm/mmu.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 796f6a2e794a..53bd4d517a4d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -363,8 +363,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud) } } -#define kvm_virt_to_phys(x) __pa_symbol(x) - void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index ddf85bf21897..a1f6bc70c4e4 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2197,11 +2197,11 @@ int kvm_mmu_init(void) { int err; - hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); - hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end); hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); - hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); + hyp_idmap_vector = __pa_symbol(__kvm_hyp_init); /* * We rely on the linker script to ensure at build time that the HYP From fc5d1f1a42fba6266ab95dc3b84937933a9b5a66 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 1 Dec 2018 08:41:28 -0800 Subject: [PATCH 18/24] KVM: arm64: vgic-v3: Take cpu_if pointer directly instead of vcpu If we move the used_lrs field to the version-specific cpu interface structure, the following functions only operate on the struct vgic_v3_cpu_if and not the full vcpu: __vgic_v3_save_state __vgic_v3_restore_state __vgic_v3_activate_traps __vgic_v3_deactivate_traps __vgic_v3_save_aprs __vgic_v3_restore_aprs This is going to be very useful for nested virt, so move the used_lrs field and change the prototypes and implementations of these functions to take the cpu_if parameter directly. No functional change. Reviewed-by: James Morse Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_hyp.h | 12 ++++++------ arch/arm64/kvm/hyp/switch.c | 8 ++++---- arch/arm64/kvm/hyp/vgic-v3-sr.c | 33 ++++++++++---------------------- arch/arm64/kvm/vgic/vgic-v2.c | 10 +++++----- arch/arm64/kvm/vgic/vgic-v3.c | 14 ++++++++------ arch/arm64/kvm/vgic/vgic.c | 25 ++++++++++++++++-------- include/kvm/arm_vgic.h | 5 ++++- 7 files changed, 54 insertions(+), 53 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fe57f60f06a8..4f67b0cdffe8 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -56,12 +56,12 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); -void __vgic_v3_save_state(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); -void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 8a1e81a400e0..c07a45643cd4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -270,8 +270,8 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(vcpu); - __vgic_v3_deactivate_traps(vcpu); + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); } } @@ -279,8 +279,8 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(vcpu); - __vgic_v3_restore_state(vcpu); + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } } diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 6b85773e15c4..10ed539835c1 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -194,10 +194,9 @@ static u32 __hyp_text __vgic_v3_read_ap1rn(int n) return val; } -void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) { - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 used_lrs = cpu_if->used_lrs; /* * Make sure stores to the GIC via the memory mapped interface @@ -230,10 +229,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) } } -void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) { - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 used_lrs = cpu_if->used_lrs; int i; if (used_lrs || cpu_if->its_vpe.its_vm) { @@ -257,10 +255,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) } } -void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) { - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a * Group0 interrupt (as generated in GICv2 mode) to be @@ -306,9 +302,8 @@ void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) { - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 val; if (!cpu_if->vgic_sre) { @@ -333,15 +328,11 @@ void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) write_gicreg(0, ICH_HCR_EL2); } -void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { - struct vgic_v3_cpu_if *cpu_if; u64 val; u32 nr_pre_bits; - vcpu = kern_hyp_va(vcpu); - cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - val = read_gicreg(ICH_VTR_EL2); nr_pre_bits = vtr_to_nr_pre_bits(val); @@ -370,15 +361,11 @@ void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) } } -void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { - struct vgic_v3_cpu_if *cpu_if; u64 val; u32 nr_pre_bits; - vcpu = kern_hyp_va(vcpu); - cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - val = read_gicreg(ICH_VTR_EL2); nr_pre_bits = vtr_to_nr_pre_bits(val); @@ -451,7 +438,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, u32 vmcr, u64 *lr_val) { - unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; u8 priority = GICv3_IDLE_PRIORITY; int i, lr = -1; @@ -490,7 +477,7 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, int intid, u64 *lr_val) { - unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; int i; for (i = 0; i < used_lrs; i++) { diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 621cc168fe3f..ebf53a4e1296 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -56,7 +56,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) cpuif->vgic_hcr &= ~GICH_HCR_UIE; - for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { + for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; u32 cpuid, intid = val & GICH_LR_VIRTUALID; struct vgic_irq *irq; @@ -120,7 +120,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) vgic_put_irq(vcpu->kvm, irq); } - vgic_cpu->used_lrs = 0; + cpuif->used_lrs = 0; } /* @@ -427,7 +427,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info) static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 used_lrs = cpu_if->used_lrs; u64 elrsr; int i; @@ -448,7 +448,7 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) void vgic_v2_save_state(struct kvm_vcpu *vcpu) { void __iomem *base = kvm_vgic_global_state.vctrl_base; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; if (!base) return; @@ -463,7 +463,7 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; void __iomem *base = kvm_vgic_global_state.vctrl_base; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 used_lrs = cpu_if->used_lrs; int i; if (!base) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 3ccd6d3cb4d3..76e2d85789ed 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -39,7 +39,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) cpuif->vgic_hcr &= ~ICH_HCR_UIE; - for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { + for (lr = 0; lr < cpuif->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; u32 intid, cpuid; struct vgic_irq *irq; @@ -111,7 +111,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) vgic_put_irq(vcpu->kvm, irq); } - vgic_cpu->used_lrs = 0; + cpuif->used_lrs = 0; } /* Requires the irq to be locked already */ @@ -662,10 +662,10 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) if (likely(cpu_if->vgic_sre)) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); - kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); + kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if)); if (has_vhe()) - __vgic_v3_activate_traps(vcpu); + __vgic_v3_activate_traps(cpu_if); WARN_ON(vgic_v4_load(vcpu)); } @@ -680,12 +680,14 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) void vgic_v3_put(struct kvm_vcpu *vcpu) { + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + WARN_ON(vgic_v4_put(vcpu, false)); vgic_v3_vmcr_sync(vcpu); - kvm_call_hyp(__vgic_v3_save_aprs, vcpu); + kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if)); if (has_vhe()) - __vgic_v3_deactivate_traps(vcpu); + __vgic_v3_deactivate_traps(cpu_if); } diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 99b02ca730a8..c3643b7f101b 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -786,6 +786,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) int count; bool multi_sgi; u8 prio = 0xff; + int i = 0; lockdep_assert_held(&vgic_cpu->ap_list_lock); @@ -827,11 +828,14 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) } } - vcpu->arch.vgic_cpu.used_lrs = count; - /* Nuke remaining LRs */ - for ( ; count < kvm_vgic_global_state.nr_lr; count++) - vgic_clear_lr(vcpu, count); + for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) + vgic_clear_lr(vcpu, i); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; + else + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; } static inline bool can_access_vgic_from_kernel(void) @@ -849,13 +853,13 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); else - __vgic_v3_save_state(vcpu); + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); } /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int used_lrs; /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) @@ -864,7 +868,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) if (can_access_vgic_from_kernel()) vgic_save_state(vcpu); - if (vgic_cpu->used_lrs) + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + else + used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + + if (used_lrs) vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); } @@ -874,7 +883,7 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); else - __vgic_v3_restore_state(vcpu); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } /* Flush our emulation state into the GIC hardware before entering the guest. */ diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 69f4164d6477..a8d8fdcd3723 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -274,6 +274,8 @@ struct vgic_v2_cpu_if { u32 vgic_vmcr; u32 vgic_apr; u32 vgic_lr[VGIC_V2_MAX_LRS]; + + unsigned int used_lrs; }; struct vgic_v3_cpu_if { @@ -291,6 +293,8 @@ struct vgic_v3_cpu_if { * linking the Linux IRQ subsystem and the ITS together. */ struct its_vpe its_vpe; + + unsigned int used_lrs; }; struct vgic_cpu { @@ -300,7 +304,6 @@ struct vgic_cpu { struct vgic_v3_cpu_if vgic_v3; }; - unsigned int used_lrs; struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; raw_spinlock_t ap_list_lock; /* Protects the ap_list */ From 7ea90bdd70c9cf82dfbaa54e7d9f296928679224 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Jun 2019 11:17:00 +0100 Subject: [PATCH 19/24] KVM: arm64: Refactor vcpu_{read,write}_sys_reg Extract the direct HW accessors for later reuse. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 128 +++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 57 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 620eaf11e672..50e328ca1419 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -64,11 +64,8 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) { - if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_read; - /* * System registers listed in the switch are not saved on every * exit from the guest but are only saved on vcpu_put. @@ -79,40 +76,37 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) * thread when emulating cross-VCPU communication. */ switch (reg) { - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; } -immediate_read: - return __vcpu_sys_reg(vcpu, reg); + return true; } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) { - if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_write; - /* * System registers listed in the switch are not restored on every * entry to the guest but are only restored on vcpu_load. @@ -122,32 +116,52 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) * once, before running the VCPU, and never changed later. */ switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; } -immediate_write: + return true; +} + +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val = 0x8badf00d8badf00d; + + if (vcpu->arch.sysregs_loaded_on_cpu && + __vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (vcpu->arch.sysregs_loaded_on_cpu && + __vcpu_write_sys_reg_to_cpu(val, reg)) + return; + __vcpu_sys_reg(vcpu, reg) = val; } From 7ccadf23b8613c946f67e2b3c5e7f436858021aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 27 Jan 2020 11:54:42 +0000 Subject: [PATCH 20/24] KVM: arm64: Add missing reset handlers for PMU emulation As we're about to become a bit more harsh when it comes to the lack of reset callbacks, let's add the missing PMU reset handlers. Note that these only cover *CLR registers that were always covered by their *SET counterpart, so there is no semantic change here. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 50e328ca1419..9d28eabbdf97 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1546,7 +1546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, - { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 }, + { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -1585,8 +1585,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, + { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, From bb44a8dbea259bc1dc2177b4bc90ca4e8fcbf659 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 27 Jan 2020 11:21:17 +0000 Subject: [PATCH 21/24] KVM: arm64: Move sysreg reset check to boot time Our sysreg reset check has become a bit silly, as it only checks whether a reset callback actually exists for a given sysreg entry, and apply the method if available. Doing the check at each vcpu reset is pretty dumb, as the tables never change. It is thus perfectly possible to do the same checks at boot time. This also allows us to introduce a sparse sys_regs[] array, something that will be required with ARMv8.4-NV. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 72 +++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9d28eabbdf97..ad1d57501d6d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2087,12 +2087,37 @@ static const struct sys_reg_desc cp15_64_regs[] = { { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, + bool is_32) +{ + unsigned int i; + + for (i = 0; i < n; i++) { + if (!is_32 && table[i].reg && !table[i].reset) { + kvm_err("sys_reg table %p entry %d has lacks reset\n", + table, i); + return 1; + } + + if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + /* Target specific emulation tables */ static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table) { + if (check_sysreg_table(table->table64.table, table->table64.num, false) || + check_sysreg_table(table->table32.table, table->table32.num, true)) + return; + target_tables[target] = table; } @@ -2378,19 +2403,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, } static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num, - unsigned long *bmap) + const struct sys_reg_desc *table, size_t num) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) { - int reg = table[i].reg; - + if (table[i].reset) table[i].reset(vcpu, &table[i]); - if (reg > 0 && reg < NR_SYS_REGS) - set_bit(reg, bmap); - } } /** @@ -2846,32 +2865,18 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } -static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) -{ - unsigned int i; - - for (i = 1; i < n; i++) { - if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { - kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); - return 1; - } - } - - return 0; -} - void kvm_sys_reg_table_init(void) { unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); - BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); - BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); - BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); - BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); - BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false)); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true)); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true)); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true)); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true)); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false)); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) @@ -2907,17 +2912,10 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { size_t num; const struct sys_reg_desc *table; - DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num, bmap); - - for (num = 1; num < NR_SYS_REGS; num++) { - if (WARN(!test_bit(num, bmap), - "Didn't reset __vcpu_sys_reg(%zi)\n", num)) - break; - } + reset_sys_reg_descs(vcpu, table, num); } From 349c330ced9764667678f4d2804fd4ebc16110c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 12 Apr 2020 18:49:31 +0100 Subject: [PATCH 22/24] KVM: arm64: Don't use empty structures as CPU reset state Keeping empty structure as the vcpu state initializer is slightly wasteful: we only want to set pstate, and zero everything else. Just do that. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/reset.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 658f3a79617b..865c8aa670bc 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -36,15 +36,11 @@ static u32 kvm_ipa_limit; /* * ARMv8 Reset Values */ -static const struct kvm_regs default_regs_reset = { - .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | - PSR_F_BIT | PSR_D_BIT), -}; +#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \ + PSR_F_BIT | PSR_D_BIT) -static const struct kvm_regs default_regs_reset32 = { - .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | - PSR_AA32_I_BIT | PSR_AA32_F_BIT), -}; +#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ + PSR_AA32_I_BIT | PSR_AA32_F_BIT) static bool cpu_has_32bit_el1(void) { @@ -257,9 +253,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_regs *cpu_reset; int ret = -EINVAL; bool loaded; + u32 pstate; /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -290,16 +286,17 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { if (!cpu_has_32bit_el1()) goto out; - cpu_reset = &default_regs_reset32; + pstate = VCPU_RESET_PSTATE_SVC; } else { - cpu_reset = &default_regs_reset; + pstate = VCPU_RESET_PSTATE_EL1; } break; } /* Reset core registers */ - memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); + vcpu_gp_regs(vcpu)->regs.pstate = pstate; /* Reset system registers */ kvm_reset_sys_regs(vcpu); From d9d7d84d9906e1bc886c5e0fc66aaad26008264b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Apr 2020 18:32:02 +0100 Subject: [PATCH 23/24] KVM: arm64: Parametrize exception entry with a target EL We currently assume that an exception is delivered to EL1, always. Once we emulate EL2, this no longer will be the case. To prepare for this, add a target_mode parameter. While we're at it, merge the computing of the target PC and PSTATE in a single function that updates both PC and CPSR after saving their previous values in the corresponding ELR/SPSR. This ensures that they are updated in the correct order (a pretty common source of bugs...). Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/ptrace.h | 1 + arch/arm64/kvm/inject_fault.c | 75 +++++++++++++++++---------------- 2 files changed, 39 insertions(+), 37 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index bf57308fcd63..953b6a1ce549 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,6 +35,7 @@ #define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ +#define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) /* AArch32-specific ptrace requests */ diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 6aafc2825c1c..e21fdd93027a 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -26,28 +26,12 @@ enum exception_type { except_type_serror = 0x180, }; -static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) -{ - u64 exc_offset; - - switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) { - case PSR_MODE_EL1t: - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - break; - case PSR_MODE_EL1h: - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - break; - case PSR_MODE_EL0t: - exc_offset = LOWER_EL_AArch64_VECTOR; - break; - default: - exc_offset = LOWER_EL_AArch32_VECTOR; - } - - return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; -} - /* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * * When an exception is taken, most PSTATE fields are left unchanged in the * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx @@ -59,10 +43,35 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from * MSB to LSB. */ -static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) { - unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - unsigned long old, new; + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; old = *vcpu_cpsr(vcpu); new = 0; @@ -105,9 +114,10 @@ static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) new |= PSR_I_BIT; new |= PSR_F_BIT; - new |= PSR_MODE_EL1h; + new |= target_mode; - return new; + *vcpu_cpsr(vcpu) = new; + vcpu_write_spsr(vcpu, old); } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -116,11 +126,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - - *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); - vcpu_write_spsr(vcpu, cpsr); + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -148,14 +154,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr static void inject_undef64(struct kvm_vcpu *vcpu) { - unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - - *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); - vcpu_write_spsr(vcpu, cpsr); + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); /* * Build an unknown exception, depending on the instruction From 8f7f4fe756bd5cfef73cf8234445081385bdbf7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 May 2020 11:38:57 +0100 Subject: [PATCH 24/24] KVM: arm64: Drop obsolete comment about sys_reg ordering The general comment about keeping the enum order in sync with the save/restore code has been obsolete for many years now. Just drop it. Note that there are other ordering requirements in the enum, such as the PtrAuth and PMU registers, which are still valid. Reported-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 69a338a390a6..59029e90b557 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -115,12 +115,8 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; -/* - * 0 is reserved as an invalid value. - * Order should be kept in sync with the save/restore code. - */ enum vcpu_sysreg { - __INVALID_SYSREG__, + __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CSSELR_EL1, /* Cache Size Selection Register */ SCTLR_EL1, /* System Control Register */