From ab70a73aa45beccab01cc0bb1d920361e3642393 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 30 Jan 2020 17:04:19 +0000 Subject: [PATCH 01/38] riscv: Use flush_icache_mm for flush_icache_user_range The only call path is: __access_remote_vm -> copy_to_user_page -> flush_icache_user_range Seems it's ok to use flush_icache_mm instead of flush_icache_all and it could reduce flush_icache_all called on other harts. Signed-off-by: Guo Ren [Palmer: git-am wouldn't apply the patch, I did so manually] Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 555b20b11dc3..c8677c75f82c 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -85,7 +85,7 @@ static inline void flush_dcache_page(struct page *page) * so instead we just flush the whole thing. */ #define flush_icache_range(start, end) flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_mm(vma->vm_mm, 0) #ifndef CONFIG_SMP From 2fab7a15604cfe3605775c5d146a7dfcf97412bb Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 5 Jan 2020 12:10:20 -0800 Subject: [PATCH 02/38] riscv: Delete CONFIG_SYSFS_SYSCALL from defconfigs According to init/Kconfig: "sys_sysfs is an obsolete system call no longer supported in libc. Note that disabling this option is more secure but might break compatibility with some systems." This syscall is not required for new architectures. Since the config defaults to 'y'. Set this to 'n' exlicitly. Signed-off-by: Deepa Dinamani Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + arch/riscv/configs/rv32_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index e2ff95cb3390..58f97b3cb24c 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -125,3 +125,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y +# CONFIG_SYSFS_SYSCALL is not set diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index eb519407c841..f4076b6ac063 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -122,3 +122,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y +# CONFIG_SYSFS_SYSCALL is not set From aff7783392e0c0152ee01653ce9c61abb4928910 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 4 Feb 2020 19:19:47 +0800 Subject: [PATCH 03/38] riscv: force hart_lottery to put in .sdata section In PIC code model, the zero initialized data always be put in .bss section, so when building kernel as PIE, the hart_lottery won't present in small data section, and it causes more than one harts to get the lottery, because the main hart clears the content of .bss section immediately after it getting the lottery. Signed-off-by: Zong Li Reviewed-by: Anup Patel [Palmer: added a comment] Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 0a6d415b0a5a..cb836fcc6118 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -39,8 +39,12 @@ struct screen_info screen_info = { }; #endif -/* The lucky hart to first increment this variable will boot the other cores */ -atomic_t hart_lottery; +/* + * The lucky hart to first increment this variable will boot the other cores. + * This is used before the kernel initializes the BSS so it can't be in the + * BSS. + */ +atomic_t hart_lottery __section(.sdata); unsigned long boot_cpu_hartid; void __init parse_dtb(void) From 064223b947a8c3d0b35a4ac9ae6e31e3f77657fd Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 18 Feb 2020 13:17:06 -0800 Subject: [PATCH 04/38] RISC-V: Stop putting .sbss in .sdata I don't know why we were doing this, as it's been there since the beginning. After d841f729e655 ("riscv: force hart_lottery to put in .sdata section") my guess would be that it made the kernel boot and we forgot to fix it more cleanly. The default .bss segment already contains the .sbss section, so we don't need to do anything additional to ensure the symbols in .sbss continue to work. Tested-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vmlinux.lds.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 1e0193ded420..a8fb52a00295 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -64,7 +64,6 @@ SECTIONS *(.sdata*) /* End of data section */ _edata = .; - *(.sbss*) } BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) From 52e7c52d2ded5908e6a4f8a7248e5fa6e0d6809a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 27 Feb 2020 11:07:28 -0800 Subject: [PATCH 05/38] RISC-V: Stop relying on GCC's register allocator's hueristics GCC allows users to hint to the register allocation that a variable should be placed in a register by using a syntax along the lines of function(...) { register long in_REG __asm__("REG"); } We've abused this a bit throughout the RISC-V port to access fixed registers directly as C variables. In practice it's never going to blow up because GCC isn't going to allocate these registers, but it's not a well defined syntax so we really shouldn't be relying upon this. Luckily there is a very similar but well defined syntax that allows us to still access these registers directly as C variables, which is to simply declare the register variables globally. For fixed variables this doesn't change the ABI. LLVM disallows this ambiguous syntax, so this isn't just strictly a formatting change. Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/current.h | 5 +++-- arch/riscv/kernel/process.c | 5 +++-- arch/riscv/kernel/stacktrace.c | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h index dd973efe5d7c..1de233d8e8de 100644 --- a/arch/riscv/include/asm/current.h +++ b/arch/riscv/include/asm/current.h @@ -17,6 +17,8 @@ struct task_struct; +register struct task_struct *riscv_current_is_tp __asm__("tp"); + /* * This only works because "struct thread_info" is at offset 0 from "struct * task_struct". This constraint seems to be necessary on other architectures @@ -26,8 +28,7 @@ struct task_struct; */ static __always_inline struct task_struct *get_current(void) { - register struct task_struct *tp __asm__("tp"); - return tp; + return riscv_current_is_tp; } #define current get_current() diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 817cf7b0974c..610c11e91606 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -22,6 +22,8 @@ #include #include +unsigned long gp_in_global __asm__("gp"); + extern asmlinkage void ret_from_fork(void); extern asmlinkage void ret_from_kernel_thread(void); @@ -107,9 +109,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, /* p->thread holds context to be restored by __switch_to() */ if (unlikely(p->flags & PF_KTHREAD)) { /* Kernel thread */ - const register unsigned long gp __asm__ ("gp"); memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp; + childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0940681d2f68..02087fe539c6 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -19,6 +19,8 @@ struct stackframe { unsigned long ra; }; +register unsigned long sp_in_global __asm__("sp"); + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) { @@ -29,7 +31,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); + const register unsigned long current_sp = sp_in_global; fp = (unsigned long)__builtin_frame_address(0); sp = current_sp; pc = (unsigned long)walk_stackframe; @@ -73,8 +75,7 @@ static void notrace walk_stackframe(struct task_struct *task, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); - sp = current_sp; + sp = sp_in_global; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ From fdff9911f266951b14b20e25557278b5b3f0d90d Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 27 Feb 2020 11:15:03 -0800 Subject: [PATCH 06/38] RISC-V: Inline the assembly register save/restore macros These are only used once, and when reading the code I've always found them to be more of a headache than a benefit. While they were never worth removing before, LLVM's integrated assembler doesn't support LOCAL so rather that trying to figure out how to refactor the macros it seems saner to just inline them. Reviewed-by: Nick Desaulniers Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 143 ++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 82 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index bad4d85b5e91..f2e8e7c8089d 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -13,17 +13,11 @@ #include #include - .text - .altmacro - -/* - * Prepares to enter a system call or exception by saving all registers to the - * stack. - */ - .macro SAVE_ALL - LOCAL _restore_kernel_tpsp - LOCAL _save_context +#if !IS_ENABLED(CONFIG_PREEMPTION) +.set resume_kernel, restore_all +#endif +ENTRY(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load * the kernel thread pointer. If we came from the kernel, the scratch @@ -90,77 +84,6 @@ _save_context: REG_S s3, PT_BADADDR(sp) REG_S s4, PT_CAUSE(sp) REG_S s5, PT_TP(sp) - .endm - -/* - * Prepares to return from a system call or exception by restoring all - * registers from the stack. - */ - .macro RESTORE_ALL - REG_L a0, PT_STATUS(sp) - /* - * The current load reservation is effectively part of the processor's - * state, in the sense that load reservations cannot be shared between - * different hart contexts. We can't actually save and restore a load - * reservation, so instead here we clear any existing reservation -- - * it's always legal for implementations to clear load reservations at - * any point (as long as the forward progress guarantee is kept, but - * we'll ignore that here). - * - * Dangling load reservations can be the result of taking a trap in the - * middle of an LR/SC sequence, but can also be the result of a taken - * forward branch around an SC -- which is how we implement CAS. As a - * result we need to clear reservations between the last CAS and the - * jump back to the new context. While it is unlikely the store - * completes, implementations are allowed to expand reservations to be - * arbitrarily large. - */ - REG_L a2, PT_EPC(sp) - REG_SC x0, a2, PT_EPC(sp) - - csrw CSR_STATUS, a0 - csrw CSR_EPC, a2 - - REG_L x1, PT_RA(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - REG_L x2, PT_SP(sp) - .endm - -#if !IS_ENABLED(CONFIG_PREEMPTION) -.set resume_kernel, restore_all -#endif - -ENTRY(handle_exception) - SAVE_ALL /* * Set the scratch register to 0, so that if a recursive exception @@ -298,7 +221,63 @@ resume_userspace: csrw CSR_SCRATCH, tp restore_all: - RESTORE_ALL + REG_L a0, PT_STATUS(sp) + /* + * The current load reservation is effectively part of the processor's + * state, in the sense that load reservations cannot be shared between + * different hart contexts. We can't actually save and restore a load + * reservation, so instead here we clear any existing reservation -- + * it's always legal for implementations to clear load reservations at + * any point (as long as the forward progress guarantee is kept, but + * we'll ignore that here). + * + * Dangling load reservations can be the result of taking a trap in the + * middle of an LR/SC sequence, but can also be the result of a taken + * forward branch around an SC -- which is how we implement CAS. As a + * result we need to clear reservations between the last CAS and the + * jump back to the new context. While it is unlikely the store + * completes, implementations are allowed to expand reservations to be + * arbitrarily large. + */ + REG_L a2, PT_EPC(sp) + REG_SC x0, a2, PT_EPC(sp) + + csrw CSR_STATUS, a0 + csrw CSR_EPC, a2 + + REG_L x1, PT_RA(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + REG_L x2, PT_SP(sp) + #ifdef CONFIG_RISCV_M_MODE mret #else From abc71bf0a70311ab294f97a7f16e8de03718c05a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 27 Feb 2020 11:16:28 -0800 Subject: [PATCH 07/38] RISC-V: Stop using LOCAL for the uaccess fixups LLVM's integrated assembler doesn't support the LOCAL directive, which we're using when generating our uaccess fixup tables. Luckily the table fragment is small enough that there's only one internal symbol, so using a relative symbol reference doesn't really complicate anything. Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/uaccess.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index f29d2ba2c0a6..fceaeb18cc64 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -3,14 +3,12 @@ #include #include - .altmacro .macro fixup op reg addr lbl - LOCAL _epc -_epc: +100: \op \reg, \addr .section __ex_table,"a" .balign RISCV_SZPTR - RISCV_PTR _epc, \lbl + RISCV_PTR 100b, \lbl .previous .endm From 3133287b53ee88444e2294e601d2bad54a798f59 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 20 Feb 2020 01:10:23 -0500 Subject: [PATCH 08/38] riscv: Use p*d_leaf macros to define p*d_huge The newly introduced p*d_leaf macros allow to check if an entry of the page table map to a physical page instead of the next level. To avoid duplication of code, use those macros to determine if a page table entry points to a hugepage. Suggested-by: Paul Walmsley Signed-off-by: Alexandre Ghiti Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 0d4747e9d5b5..a6189ed36c5f 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -4,14 +4,12 @@ int pud_huge(pud_t pud) { - return pud_present(pud) && - (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); + return pud_leaf(pud); } int pmd_huge(pmd_t pmd) { - return pmd_present(pmd) && - (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); + return pmd_leaf(pmd); } static __init int setup_hugepagesz(char *opt) From 9f40b6e77d2f888a8c0608036eb124cedb6d2434 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 24 Feb 2020 11:34:36 -0800 Subject: [PATCH 09/38] RISC-V: Move all address space definition macros to one place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If both CONFIG_KASAN and CONFIG_SPARSEMEM_VMEMMAP are set, we get the following compilation error. --------------------------------------------------------------- ./arch/riscv/include/asm/pgtable-64.h: In function ‘pud_page’: ./include/asm-generic/memory_model.h:54:29: error: ‘vmemmap’ undeclared (first use in this function); did you mean ‘mem_map’? #define __pfn_to_page(pfn) (vmemmap + (pfn)) ^~~~~~~ ./include/asm-generic/memory_model.h:82:21: note: in expansion of macro ‘__pfn_to_page’ #define pfn_to_page __pfn_to_page ^~~~~~~~~~~~~ ./arch/riscv/include/asm/pgtable-64.h:70:9: note: in expansion of macro ‘pfn_to_page’ return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); --------------------------------------------------------------- Fix the compliation errors by moving all the address space definition macros before including pgtable-64.h. Fixes: 8ad8b72721d0 (riscv: Add KASAN support) Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 78 +++++++++++++++++--------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e43041519edd..393f2014dfee 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -19,6 +19,47 @@ #include #include +#ifdef CONFIG_MMU + +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (VMALLOC_END) + +/* + * Roughly size the vmemmap space to be large enough to fit enough + * struct pages to map half the virtual address space. Then + * position vmemmap directly below the VMALLOC region. + */ +#define VMEMMAP_SHIFT \ + (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) +#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) + +/* + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +#define vmemmap ((struct page *)VMEMMAP_START) + +#define PCI_IO_SIZE SZ_16M +#define PCI_IO_END VMEMMAP_START +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) + +#define FIXADDR_TOP PCI_IO_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#endif + #ifdef CONFIG_64BIT #include #else @@ -90,31 +131,6 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define BPF_JIT_REGION_SIZE (SZ_128M) -#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) -#define BPF_JIT_REGION_END (VMALLOC_END) - -/* - * Roughly size the vmemmap space to be large enough to fit enough - * struct pages to map half the virtual address space. Then - * position vmemmap directly below the VMALLOC region. - */ -#define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) -#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) - -/* - * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel - * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. - */ -#define vmemmap ((struct page *)VMEMMAP_START) - static inline int pmd_present(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -432,18 +448,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PCI_IO_SIZE SZ_16M -#define PCI_IO_END VMEMMAP_START -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) - -#define FIXADDR_TOP PCI_IO_START -#ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE -#else -#define FIXADDR_SIZE PGDIR_SIZE -#endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. From d3ab332a5021235a74fd832a49c6a99404920d88 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:36 +0800 Subject: [PATCH 10/38] riscv: add ARCH_HAS_SET_MEMORY support Add set_memory_ro/rw/x/nx architecture hooks to change the page attribution. Use own set_memory.h rather than generic set_memory.h (i.e. include/asm-generic/set_memory.h), because we want to add other function prototypes here. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/set_memory.h | 24 +++++ arch/riscv/mm/Makefile | 2 +- arch/riscv/mm/pageattr.c | 150 ++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/include/asm/set_memory.h create mode 100644 arch/riscv/mm/pageattr.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 73f029eae0cc..3da5c2004191 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -59,6 +59,7 @@ config RISCV select HAVE_EBPF_JIT if 64BIT select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_SET_MEMORY select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h new file mode 100644 index 000000000000..79a810f0f38b --- /dev/null +++ b/arch/riscv/include/asm/set_memory.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 SiFive + */ + +#ifndef _ASM_RISCV_SET_MEMORY_H +#define _ASM_RISCV_SET_MEMORY_H + +/* + * Functions to change memory attributes. + */ +#ifdef CONFIG_MMU +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif + +#endif /* _ASM_RISCV_SET_MEMORY_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 50b7af58c566..e0e10b618273 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -7,7 +7,7 @@ endif obj-y += init.o obj-y += extable.o -obj-$(CONFIG_MMU) += fault.o +obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c new file mode 100644 index 000000000000..fcd59ef2835b --- /dev/null +++ b/arch/riscv/mm/pageattr.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019 SiFive + */ + +#include +#include +#include +#include + +struct pageattr_masks { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) +{ + struct pageattr_masks *masks = walk->private; + unsigned long new_val = val; + + new_val &= ~(pgprot_val(masks->clear_mask)); + new_val |= (pgprot_val(masks->set_mask)); + + return new_val; +} + +static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pgd_t val = READ_ONCE(*pgd); + + if (pgd_leaf(val)) { + val = __pgd(set_pageattr_masks(pgd_val(val), walk)); + set_pgd(pgd, val); + } + + return 0; +} + +static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + p4d_t val = READ_ONCE(*p4d); + + if (p4d_leaf(val)) { + val = __p4d(set_pageattr_masks(p4d_val(val), walk)); + set_p4d(p4d, val); + } + + return 0; +} + +static int pageattr_pud_entry(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pud_t val = READ_ONCE(*pud); + + if (pud_leaf(val)) { + val = __pud(set_pageattr_masks(pud_val(val), walk)); + set_pud(pud, val); + } + + return 0; +} + +static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pmd_t val = READ_ONCE(*pmd); + + if (pmd_leaf(val)) { + val = __pmd(set_pageattr_masks(pmd_val(val), walk)); + set_pmd(pmd, val); + } + + return 0; +} + +static int pageattr_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t val = READ_ONCE(*pte); + + val = __pte(set_pageattr_masks(pte_val(val), walk)); + set_pte(pte, val); + + return 0; +} + +static int pageattr_pte_hole(unsigned long addr, unsigned long next, + int depth, struct mm_walk *walk) +{ + /* Nothing to do here */ + return 0; +} + +const static struct mm_walk_ops pageattr_ops = { + .pgd_entry = pageattr_pgd_entry, + .p4d_entry = pageattr_p4d_entry, + .pud_entry = pageattr_pud_entry, + .pmd_entry = pageattr_pmd_entry, + .pte_entry = pageattr_pte_entry, + .pte_hole = pageattr_pte_hole, +}; + +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, + pgprot_t clear_mask) +{ + int ret; + unsigned long start = addr; + unsigned long end = start + PAGE_SIZE * numpages; + struct pageattr_masks masks = { + .set_mask = set_mask, + .clear_mask = clear_mask + }; + + if (!numpages) + return 0; + + down_read(&init_mm.mmap_sem); + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + &masks); + up_read(&init_mm.mmap_sem); + + flush_tlb_kernel_range(start, end); + + return ret; +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_READ), + __pgprot(_PAGE_WRITE)); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE), + __pgprot(0)); +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0)); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC)); +} From 395a21ff859c9c2471ea62d7d56af8a85ec333f7 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:37 +0800 Subject: [PATCH 11/38] riscv: add ARCH_HAS_SET_DIRECT_MAP support Add set_direct_map_*() functions for setting the direct map alias for the page to its default permissions and to an invalid state that cannot be cached in a TLB. (See d253ca0c ("x86/mm/cpa: Add set_direct_map_*() functions")) Add a similar implementation for RISC-V. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/set_memory.h | 3 +++ arch/riscv/mm/pageattr.c | 24 ++++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3da5c2004191..34fc7450b9dd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -59,6 +59,7 @@ config RISCV select HAVE_EBPF_JIT if 64BIT select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 79a810f0f38b..620d81c372d9 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -21,4 +21,7 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + #endif /* _ASM_RISCV_SET_MEMORY_H */ diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index fcd59ef2835b..7be6cd67e2ef 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -148,3 +148,27 @@ int set_memory_nx(unsigned long addr, int numpages) { return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC)); } + +int set_direct_map_invalid_noflush(struct page *page) +{ + unsigned long start = (unsigned long)page_address(page); + unsigned long end = start + PAGE_SIZE; + struct pageattr_masks masks = { + .set_mask = __pgprot(0), + .clear_mask = __pgprot(_PAGE_PRESENT) + }; + + return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); +} + +int set_direct_map_default_noflush(struct page *page) +{ + unsigned long start = (unsigned long)page_address(page); + unsigned long end = start + PAGE_SIZE; + struct pageattr_masks masks = { + .set_mask = PAGE_KERNEL, + .clear_mask = __pgprot(0) + }; + + return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); +} From 5fde3db5eb028b95aeefa1ab192d36800414e8b8 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:38 +0800 Subject: [PATCH 12/38] riscv: add ARCH_SUPPORTS_DEBUG_PAGEALLOC support ARCH_SUPPORTS_DEBUG_PAGEALLOC provides a hook to map and unmap pages for debugging purposes. Implement the __kernel_map_pages functions to fill the poison pattern. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 +++ arch/riscv/mm/pageattr.c | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 34fc7450b9dd..31fbc6ef7a6d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -131,6 +131,9 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_WANT_GENERAL_HUGETLB def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + config SYS_SUPPORTS_HUGETLBFS def_bool y diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 7be6cd67e2ef..728759eb530a 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -172,3 +172,16 @@ int set_direct_map_default_noflush(struct page *page) return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); } + +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!debug_pagealloc_enabled()) + return; + + if (enable) + __set_memory((unsigned long)page_address(page), numpages, + __pgprot(_PAGE_PRESENT), __pgprot(0)); + else + __set_memory((unsigned long)page_address(page), numpages, + __pgprot(0), __pgprot(_PAGE_PRESENT)); +} From bd3d914d16aaf82412771a2d673299d4b5e3aeda Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:39 +0800 Subject: [PATCH 13/38] riscv: move exception table immediately after RO_DATA Move EXCEPTION_TABLE immediately after RO_DATA. Make it easy to set the attribution of the sections which should be read-only at a time. Add _data to specify the start of data section with write permission. This patch is prepared for STRICT_KERNEL_RWX support. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vmlinux.lds.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index a8fb52a00295..9e8adca9b20f 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -58,6 +58,10 @@ SECTIONS *(.srodata*) } + EXCEPTION_TABLE(0x10) + + _data = .; + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) .sdata : { __global_pointer$ = . + 0x800; @@ -68,8 +72,6 @@ SECTIONS BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) - EXCEPTION_TABLE(0x10) - .rel.dyn : { *(.rel.dyn*) } From 00cb41d5ad3189f52a59f42766918557693f94fa Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:40 +0800 Subject: [PATCH 14/38] riscv: add alignment for text, rodata and data sections The kernel mapping will tried to optimize its mapping by using bigger size. In rv64, it tries to use PMD_SIZE, and tryies to use PGDIR_SIZE in rv32. To ensure that the start address of these sections could fit the mapping entry size, make them align to the biggest alignment. Define a macro SECTION_ALIGN because the HPAGE_SIZE or PMD_SIZE, etc., are invisible in linker script. This patch is prepared for STRICT_KERNEL_RWX support. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/set_memory.h | 13 +++++++++++++ arch/riscv/kernel/vmlinux.lds.S | 5 ++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 620d81c372d9..4c5bae7ca01c 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -6,6 +6,7 @@ #ifndef _ASM_RISCV_SET_MEMORY_H #define _ASM_RISCV_SET_MEMORY_H +#ifndef __ASSEMBLY__ /* * Functions to change memory attributes. */ @@ -24,4 +25,16 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_ARCH_HAS_STRICT_KERNEL_RWX +#ifdef CONFIG_64BIT +#define SECTION_ALIGN (1 << 21) +#else +#define SECTION_ALIGN (1 << 22) +#endif +#else /* !CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */ +#define SECTION_ALIGN L1_CACHE_BYTES +#endif /* CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */ + #endif /* _ASM_RISCV_SET_MEMORY_H */ diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 9e8adca9b20f..4ad3c8eb241d 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -9,6 +9,7 @@ #include #include #include +#include OUTPUT_ARCH(riscv) ENTRY(_start) @@ -36,6 +37,7 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) __init_end = .; + . = ALIGN(SECTION_ALIGN); .text : { _text = .; _stext = .; @@ -53,13 +55,14 @@ SECTIONS /* Start of data section */ _sdata = .; - RO_DATA(L1_CACHE_BYTES) + RO_DATA(SECTION_ALIGN) .srodata : { *(.srodata*) } EXCEPTION_TABLE(0x10) + . = ALIGN(SECTION_ALIGN); _data = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) From d27c3c90817e4c5ea655714065a725b4abd576f9 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:41 +0800 Subject: [PATCH 15/38] riscv: add STRICT_KERNEL_RWX support The commit contains that make text section as non-writable, rodata section as read-only, and data section as non-executable. The init section should be changed to non-executable. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/set_memory.h | 8 ++++++ arch/riscv/mm/init.c | 44 +++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 31fbc6ef7a6d..dfc9c3ea9f7d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -61,6 +61,7 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY + select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 4c5bae7ca01c..c38df4771c09 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -22,6 +22,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +#ifdef CONFIG_STRICT_KERNEL_RWX +void set_kernel_text_ro(void); +void set_kernel_text_rw(void); +#else +static inline void set_kernel_text_ro(void) { } +static inline void set_kernel_text_rw(void) { } +#endif + int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 965a8cf4829c..0c625a5e98db 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -477,6 +478,17 @@ static void __init setup_vm_final(void) csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); } + +void free_initmem(void) +{ + unsigned long init_begin = (unsigned long)__init_begin; + unsigned long init_end = (unsigned long)__init_end; + + /* Make the region as non-execuatble. */ + set_memory_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); + free_initmem_default(POISON_FREE_INITMEM); +} + #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) { @@ -488,6 +500,38 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ +#ifdef CONFIG_STRICT_KERNEL_RWX +void set_kernel_text_rw(void) +{ + unsigned long text_start = (unsigned long)_text; + unsigned long text_end = (unsigned long)_etext; + + set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT); +} + +void set_kernel_text_ro(void) +{ + unsigned long text_start = (unsigned long)_text; + unsigned long text_end = (unsigned long)_etext; + + set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); +} + +void mark_rodata_ro(void) +{ + unsigned long text_start = (unsigned long)_text; + unsigned long text_end = (unsigned long)_etext; + unsigned long rodata_start = (unsigned long)__start_rodata; + unsigned long data_start = (unsigned long)_data; + unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); + + set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); + set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); +} +#endif + void __init paging_init(void) { setup_vm_final(); From b42d763a2d412d6ef7c29cb2f1b3e9985e2b1e38 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:42 +0800 Subject: [PATCH 16/38] riscv: add macro to get instruction length Extract the calculation of instruction length for common use. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/bug.h | 8 ++++++++ arch/riscv/kernel/traps.c | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 75604fec1b1b..d6f1ec08d97b 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -19,6 +19,14 @@ #define __BUG_INSN_32 _UL(0x00100073) /* ebreak */ #define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */ +#define GET_INSN_LENGTH(insn) \ +({ \ + unsigned long __len; \ + __len = ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? \ + 4UL : 2UL; \ + __len; \ +}) + typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ffb3d94bf0cc..a4d136355f78 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -118,7 +118,8 @@ static inline unsigned long get_break_insn_length(unsigned long pc) if (probe_kernel_address((bug_insn_t *)pc, insn)) return 0; - return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL); + + return GET_INSN_LENGTH(insn); } asmlinkage __visible void do_trap_break(struct pt_regs *regs) From 043cb41a85de1c0e944da61ad7a264960e22c865 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:43 +0800 Subject: [PATCH 17/38] riscv: introduce interfaces to patch kernel code On strict kernel memory permission, we couldn't patch code without writable permission. Preserve two holes in fixmap area, so we can map the kernel code temporarily to fixmap area, then patch the instructions. We need two pages here because we support the compressed instruction, so the instruction might be align to 2 bytes. When patching the 32-bit length instruction which is 2 bytes alignment, it will across two pages. Introduce two interfaces to patch kernel code: riscv_patch_text_nosync: - patch code without synchronization, it's caller's responsibility to synchronize all CPUs if needed. riscv_patch_text: - patch code and always synchronize with stop_machine() Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/fixmap.h | 2 + arch/riscv/include/asm/patch.h | 12 ++++ arch/riscv/kernel/Makefile | 4 +- arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/include/asm/patch.h create mode 100644 arch/riscv/kernel/patch.c diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 42d2c42f3cc9..2368d49eb4ef 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -27,6 +27,8 @@ enum fixed_addresses { FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, FIX_PTE, FIX_PMD, + FIX_TEXT_POKE1, + FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h new file mode 100644 index 000000000000..b5918a6e0615 --- /dev/null +++ b/arch/riscv/include/asm/patch.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 SiFive + */ + +#ifndef _ASM_RISCV_PATCH_H +#define _ASM_RISCV_PATCH_H + +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len); +int riscv_patch_text(void *addr, u32 insn); + +#endif /* _ASM_RISCV_PATCH_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f40205cb9a22..d189bd3d8501 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -4,7 +4,8 @@ # ifdef CONFIG_FTRACE -CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_patch.o = -pg endif extra-y += head.o @@ -26,6 +27,7 @@ obj-y += traps.o obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o +obj-y += patch.o obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += clint.o diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c new file mode 100644 index 000000000000..8a4fc65ee022 --- /dev/null +++ b/arch/riscv/kernel/patch.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 SiFive + */ + +#include +#include +#include +#include +#include +#include +#include + +struct riscv_insn_patch { + void *addr; + u32 insn; + atomic_t cpu_count; +}; + +#ifdef CONFIG_MMU +static DEFINE_RAW_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + uintptr_t uintaddr = (uintptr_t) addr; + struct page *page; + + if (core_kernel_text(uintaddr)) + page = phys_to_page(__pa_symbol(addr)); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); + else + return addr; + + BUG_ON(!page); + + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} + +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) +{ + void *waddr = addr; + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; + unsigned long flags = 0; + int ret; + + raw_spin_lock_irqsave(&patch_lock, flags); + + if (across_pages) + patch_map(addr + len, FIX_TEXT_POKE1); + + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, insn, len); + + patch_unmap(FIX_TEXT_POKE0); + + if (across_pages) + patch_unmap(FIX_TEXT_POKE1); + + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} +#else +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) +{ + return probe_kernel_write(addr, insn, len); +} +#endif /* CONFIG_MMU */ + +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) +{ + u32 *tp = addr; + int ret; + + ret = riscv_insn_write(tp, insns, len); + + if (!ret) + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + + return ret; +} + +static int __kprobes riscv_patch_text_cb(void *data) +{ + struct riscv_insn_patch *patch = data; + int ret = 0; + + if (atomic_inc_return(&patch->cpu_count) == 1) { + ret = + riscv_patch_text_nosync(patch->addr, &patch->insn, + GET_INSN_LENGTH(patch->insn)); + atomic_inc(&patch->cpu_count); + } else { + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) + cpu_relax(); + smp_mb(); + } + + return ret; +} + +int __kprobes riscv_patch_text(void *addr, u32 insn) +{ + struct riscv_insn_patch patch = { + .addr = addr, + .insn = insn, + .cpu_count = ATOMIC_INIT(0), + }; + + return stop_machine_cpuslocked(riscv_patch_text_cb, + &patch, cpu_online_mask); +} From 8fdddb2eae731dd6beb89f9b812e5915d1beb744 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Tue, 10 Mar 2020 00:55:44 +0800 Subject: [PATCH 18/38] riscv: patch code by fixmap mapping On strict kernel memory permission, the ftrace have to change the permission of text for dynamic patching the intructions. Use riscv_patch_text_nosync() to patch code instead of probe_kernel_write. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ftrace.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index c40fdcdeb950..ce69b34ff55d 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_DYNAMIC_FTRACE static int ftrace_check_current_call(unsigned long hook_pos, @@ -46,20 +47,14 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, { unsigned int call[2]; unsigned int nops[2] = {NOP4, NOP4}; - int ret = 0; make_call(hook_pos, target, call); - /* replace the auipc-jalr pair at once */ - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, - MCOUNT_INSN_SIZE); - /* return must be -EPERM on write error */ - if (ret) + /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ + if (riscv_patch_text_nosync + ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) return -EPERM; - smp_mb(); - flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE); - return 0; } From 59c4da8640ccf4721d54d36835706f3eefb521a4 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 12 Mar 2020 10:58:35 +0800 Subject: [PATCH 19/38] riscv: Add support to dump the kernel page tables In a similar manner to arm64, x86, powerpc, etc., it can traverse all page tables, and dump the page table layout with the memory types and permissions. Add a debugfs file at /sys/kernel/debug/kernel_page_tables to export the page table layout to userspace. Signed-off-by: Zong Li Tested-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable.h | 10 + arch/riscv/include/asm/ptdump.h | 11 ++ arch/riscv/mm/Makefile | 1 + arch/riscv/mm/ptdump.c | 317 +++++++++++++++++++++++++++++++ 5 files changed, 340 insertions(+) create mode 100644 arch/riscv/include/asm/ptdump.h create mode 100644 arch/riscv/mm/ptdump.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index dfc9c3ea9f7d..b2764a0a0c8c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,6 +29,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT select GENERIC_IOREMAP + select GENERIC_PTDUMP if MMU select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select HAVE_ASM_MODVERSIONS diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 393f2014dfee..9c188ad2e52d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -448,6 +448,16 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * In the RV64 Linux scheme, we give the user half of the virtual-address space + * and give the kernel the other (upper) half. + */ +#ifdef CONFIG_64BIT +#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE) +#else +#define KERN_VIRT_START FIXADDR_START +#endif + /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. diff --git a/arch/riscv/include/asm/ptdump.h b/arch/riscv/include/asm/ptdump.h new file mode 100644 index 000000000000..e29af7191909 --- /dev/null +++ b/arch/riscv/include/asm/ptdump.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive + */ + +#ifndef _ASM_RISCV_PTDUMP_H +#define _ASM_RISCV_PTDUMP_H + +void ptdump_check_wx(void); + +#endif /* _ASM_RISCV_PTDUMP_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index e0e10b618273..363ef01c30b1 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -15,6 +15,7 @@ ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c new file mode 100644 index 000000000000..7eab76a93106 --- /dev/null +++ b/arch/riscv/mm/ptdump.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019 SiFive + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned long start_pa; + unsigned long last_pa; + int level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; +}; + +/* Address marker */ +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { +#ifdef CONFIG_KASAN + {KASAN_SHADOW_START, "Kasan shadow start"}, + {KASAN_SHADOW_END, "Kasan shadow end"}, +#endif + {FIXADDR_START, "Fixmap start"}, + {FIXADDR_TOP, "Fixmap end"}, + {PCI_IO_START, "PCI I/O start"}, + {PCI_IO_END, "PCI I/O end"}, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + {VMEMMAP_START, "vmemmap start"}, + {VMEMMAP_END, "vmemmap end"}, +#endif + {VMALLOC_START, "vmalloc() area"}, + {VMALLOC_END, "vmalloc() end"}, + {PAGE_OFFSET, "Linear mapping"}, + {-1, NULL}, +}; + +/* Page Table Entry */ +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = _PAGE_SOFT, + .val = _PAGE_SOFT, + .set = "RSW", + .clear = " ", + }, { + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "D", + .clear = ".", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "A", + .clear = ".", + }, { + .mask = _PAGE_GLOBAL, + .val = _PAGE_GLOBAL, + .set = "G", + .clear = ".", + }, { + .mask = _PAGE_USER, + .val = _PAGE_USER, + .set = "U", + .clear = ".", + }, { + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = "X", + .clear = ".", + }, { + .mask = _PAGE_WRITE, + .val = _PAGE_WRITE, + .set = "W", + .clear = ".", + }, { + .mask = _PAGE_READ, + .val = _PAGE_READ, + .set = "R", + .clear = ".", + }, { + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "V", + .clear = ".", + } +}; + +/* Page Level */ +struct pg_level { + const char *name; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { /* pgd */ + .name = "PGD", + }, { /* p4d */ + .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD", + }, { /* pud */ + .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", + }, { /* pmd */ + .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", + }, { /* pte */ + .name = "PTE", + }, +}; + +static void dump_prot(struct pg_state *st) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { + const char *s; + + if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val) + s = pte_bits[i].set; + else + s = pte_bits[i].clear; + + if (s) + pt_dump_seq_printf(st->seq, " %s", s); + } +} + +#ifdef CONFIG_64BIT +#define ADDR_FORMAT "0x%016lx" +#else +#define ADDR_FORMAT "0x%08lx" +#endif +static void dump_addr(struct pg_state *st, unsigned long addr) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + unsigned long delta; + + pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " ", + st->start_address, addr); + + pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " ", st->start_pa); + delta = (addr - st->start_address) >> 10; + + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); +} + +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) != + (_PAGE_WRITE | _PAGE_EXEC)) + return; + + WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_page(struct ptdump_state *pt_st, unsigned long addr, + int level, unsigned long val) +{ + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + u64 pa = PFN_PHYS(pte_pfn(__pte(val))); + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; + + if (st->level == -1) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + st->start_pa = pa; + st->last_pa = pa; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || + level != st->level || addr >= st->marker[1].start_address) { + if (st->current_prot) { + note_prot_wx(st, addr); + dump_addr(st, addr); + dump_prot(st); + pt_dump_seq_puts(st->seq, "\n"); + } + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", + st->marker->name); + } + + st->start_address = addr; + st->start_pa = pa; + st->last_pa = pa; + st->current_prot = prot; + st->level = level; + } else { + st->last_pa = pa; + } +} + +static void ptdump_walk(struct seq_file *s) +{ + struct pg_state st = { + .seq = s, + .marker = address_markers, + .level = -1, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {KERN_VIRT_START, ULONG_MAX}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); +} + +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + {0, NULL}, + {-1, NULL}, + }, + .level = -1, + .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {KERN_VIRT_START, ULONG_MAX}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + + if (st.wx_pages) + pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n", + st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + ptdump_walk(m); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(ptdump); + +static int ptdump_init(void) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + for (j = 0; j < ARRAY_SIZE(pte_bits); j++) + pg_level[i].mask |= pte_bits[j].mask; + + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + + return 0; +} + +device_initcall(ptdump_init); From 88d110382555ac2aef3bca8e4f4c6e5602a22faf Mon Sep 17 00:00:00 2001 From: Zong Li Date: Thu, 12 Mar 2020 10:58:36 +0800 Subject: [PATCH 20/38] riscv: Use macro definition instead of magic number The KERN_VIRT_START defines the start virtual address of kernel space. Use this macro instead of magic number. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kasan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index eee6e6588b12..b47045cb85ce 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -13,7 +13,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT)) -#define KASAN_SHADOW_START 0xffffffc000000000 /* 2^64 - 2^38 */ +#define KASAN_SHADOW_START KERN_VIRT_START /* 2^64 - 2^38 */ #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ From 8446923ae4d776f42bf088ab99b1f91141ab6370 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:34 -0700 Subject: [PATCH 21/38] RISC-V: Mark existing SBI as 0.1 SBI. As per the new SBI specification, current SBI implementation version is defined as 0.1 and will be removed/replaced in future. Each of the function call in 0.1 is defined as a separate extension which makes easier to replace them one at a time. Rename existing implementation to reflect that. This patch is just a preparatory patch for SBI v0.2 and doesn't introduce any functional changes. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 41 +++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2570c1e683d3..2a637ebd7a22 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2015 Regents of the University of California + * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ #ifndef _ASM_RISCV_SBI_H @@ -9,15 +10,15 @@ #include #ifdef CONFIG_RISCV_SBI -#define SBI_SET_TIMER 0 -#define SBI_CONSOLE_PUTCHAR 1 -#define SBI_CONSOLE_GETCHAR 2 -#define SBI_CLEAR_IPI 3 -#define SBI_SEND_IPI 4 -#define SBI_REMOTE_FENCE_I 5 -#define SBI_REMOTE_SFENCE_VMA 6 -#define SBI_REMOTE_SFENCE_VMA_ASID 7 -#define SBI_SHUTDOWN 8 +#define SBI_EXT_0_1_SET_TIMER 0x0 +#define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1 +#define SBI_EXT_0_1_CONSOLE_GETCHAR 0x2 +#define SBI_EXT_0_1_CLEAR_IPI 0x3 +#define SBI_EXT_0_1_SEND_IPI 0x4 +#define SBI_EXT_0_1_REMOTE_FENCE_I 0x5 +#define SBI_EXT_0_1_REMOTE_SFENCE_VMA 0x6 +#define SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID 0x7 +#define SBI_EXT_0_1_SHUTDOWN 0x8 #define SBI_CALL(which, arg0, arg1, arg2, arg3) ({ \ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \ @@ -43,48 +44,49 @@ static inline void sbi_console_putchar(int ch) { - SBI_CALL_1(SBI_CONSOLE_PUTCHAR, ch); + SBI_CALL_1(SBI_EXT_0_1_CONSOLE_PUTCHAR, ch); } static inline int sbi_console_getchar(void) { - return SBI_CALL_0(SBI_CONSOLE_GETCHAR); + return SBI_CALL_0(SBI_EXT_0_1_CONSOLE_GETCHAR); } static inline void sbi_set_timer(uint64_t stime_value) { #if __riscv_xlen == 32 - SBI_CALL_2(SBI_SET_TIMER, stime_value, stime_value >> 32); + SBI_CALL_2(SBI_EXT_0_1_SET_TIMER, stime_value, + stime_value >> 32); #else - SBI_CALL_1(SBI_SET_TIMER, stime_value); + SBI_CALL_1(SBI_EXT_0_1_SET_TIMER, stime_value); #endif } static inline void sbi_shutdown(void) { - SBI_CALL_0(SBI_SHUTDOWN); + SBI_CALL_0(SBI_EXT_0_1_SHUTDOWN); } static inline void sbi_clear_ipi(void) { - SBI_CALL_0(SBI_CLEAR_IPI); + SBI_CALL_0(SBI_EXT_0_1_CLEAR_IPI); } static inline void sbi_send_ipi(const unsigned long *hart_mask) { - SBI_CALL_1(SBI_SEND_IPI, hart_mask); + SBI_CALL_1(SBI_EXT_0_1_SEND_IPI, hart_mask); } static inline void sbi_remote_fence_i(const unsigned long *hart_mask) { - SBI_CALL_1(SBI_REMOTE_FENCE_I, hart_mask); + SBI_CALL_1(SBI_EXT_0_1_REMOTE_FENCE_I, hart_mask); } static inline void sbi_remote_sfence_vma(const unsigned long *hart_mask, unsigned long start, unsigned long size) { - SBI_CALL_3(SBI_REMOTE_SFENCE_VMA, hart_mask, start, size); + SBI_CALL_3(SBI_EXT_0_1_REMOTE_SFENCE_VMA, hart_mask, start, size); } static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, @@ -92,7 +94,8 @@ static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, unsigned long size, unsigned long asid) { - SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid); + SBI_CALL_4(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, hart_mask, + start, size, asid); } #else /* CONFIG_RISCV_SBI */ /* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */ From b9dcd9e415872ae29f87667d23c8a8b946d24611 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:35 -0700 Subject: [PATCH 22/38] RISC-V: Add basic support for SBI v0.2 The SBI v0.2 introduces a base extension which is backward compatible with v0.1. Implement all helper functions and minimum required SBI calls from v0.2 for now. All other base extension function will be added later as per need. As v0.2 calling convention is backward compatible with v0.1, remove the v0.1 helper functions and just use v0.2 calling convention. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 149 +++++++++++---------- arch/riscv/kernel/sbi.c | 243 ++++++++++++++++++++++++++++++++++- arch/riscv/kernel/setup.c | 5 + 3 files changed, 319 insertions(+), 78 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2a637ebd7a22..5b7b91c7e7e6 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -10,92 +10,88 @@ #include #ifdef CONFIG_RISCV_SBI -#define SBI_EXT_0_1_SET_TIMER 0x0 -#define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1 -#define SBI_EXT_0_1_CONSOLE_GETCHAR 0x2 -#define SBI_EXT_0_1_CLEAR_IPI 0x3 -#define SBI_EXT_0_1_SEND_IPI 0x4 -#define SBI_EXT_0_1_REMOTE_FENCE_I 0x5 -#define SBI_EXT_0_1_REMOTE_SFENCE_VMA 0x6 -#define SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID 0x7 -#define SBI_EXT_0_1_SHUTDOWN 0x8 +enum sbi_ext_id { + SBI_EXT_0_1_SET_TIMER = 0x0, + SBI_EXT_0_1_CONSOLE_PUTCHAR = 0x1, + SBI_EXT_0_1_CONSOLE_GETCHAR = 0x2, + SBI_EXT_0_1_CLEAR_IPI = 0x3, + SBI_EXT_0_1_SEND_IPI = 0x4, + SBI_EXT_0_1_REMOTE_FENCE_I = 0x5, + SBI_EXT_0_1_REMOTE_SFENCE_VMA = 0x6, + SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID = 0x7, + SBI_EXT_0_1_SHUTDOWN = 0x8, + SBI_EXT_BASE = 0x10, +}; -#define SBI_CALL(which, arg0, arg1, arg2, arg3) ({ \ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \ - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); \ - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); \ - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); \ - register uintptr_t a7 asm ("a7") = (uintptr_t)(which); \ - asm volatile ("ecall" \ - : "+r" (a0) \ - : "r" (a1), "r" (a2), "r" (a3), "r" (a7) \ - : "memory"); \ - a0; \ -}) +enum sbi_ext_base_fid { + SBI_EXT_BASE_GET_SPEC_VERSION = 0, + SBI_EXT_BASE_GET_IMP_ID, + SBI_EXT_BASE_GET_IMP_VERSION, + SBI_EXT_BASE_PROBE_EXT, + SBI_EXT_BASE_GET_MVENDORID, + SBI_EXT_BASE_GET_MARCHID, + SBI_EXT_BASE_GET_MIMPID, +}; -/* Lazy implementations until SBI is finalized */ -#define SBI_CALL_0(which) SBI_CALL(which, 0, 0, 0, 0) -#define SBI_CALL_1(which, arg0) SBI_CALL(which, arg0, 0, 0, 0) -#define SBI_CALL_2(which, arg0, arg1) SBI_CALL(which, arg0, arg1, 0, 0) -#define SBI_CALL_3(which, arg0, arg1, arg2) \ - SBI_CALL(which, arg0, arg1, arg2, 0) -#define SBI_CALL_4(which, arg0, arg1, arg2, arg3) \ - SBI_CALL(which, arg0, arg1, arg2, arg3) +#define SBI_SPEC_VERSION_DEFAULT 0x1 +#define SBI_SPEC_VERSION_MAJOR_SHIFT 24 +#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f +#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff -static inline void sbi_console_putchar(int ch) +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 + +extern unsigned long sbi_spec_version; +struct sbiret { + long error; + long value; +}; + +int sbi_init(void); +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +void sbi_console_putchar(int ch); +int sbi_console_getchar(void); +void sbi_set_timer(uint64_t stime_value); +void sbi_shutdown(void); +void sbi_clear_ipi(void); +void sbi_send_ipi(const unsigned long *hart_mask); +void sbi_remote_fence_i(const unsigned long *hart_mask); +void sbi_remote_sfence_vma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size); + +void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid); +int sbi_probe_extension(int ext); + +/* Check if current SBI specification version is 0.1 or not */ +static inline int sbi_spec_is_0_1(void) { - SBI_CALL_1(SBI_EXT_0_1_CONSOLE_PUTCHAR, ch); + return (sbi_spec_version == SBI_SPEC_VERSION_DEFAULT) ? 1 : 0; } -static inline int sbi_console_getchar(void) +/* Get the major version of SBI */ +static inline unsigned long sbi_major_version(void) { - return SBI_CALL_0(SBI_EXT_0_1_CONSOLE_GETCHAR); + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & + SBI_SPEC_VERSION_MAJOR_MASK; } -static inline void sbi_set_timer(uint64_t stime_value) +/* Get the minor version of SBI */ +static inline unsigned long sbi_minor_version(void) { -#if __riscv_xlen == 32 - SBI_CALL_2(SBI_EXT_0_1_SET_TIMER, stime_value, - stime_value >> 32); -#else - SBI_CALL_1(SBI_EXT_0_1_SET_TIMER, stime_value); -#endif -} - -static inline void sbi_shutdown(void) -{ - SBI_CALL_0(SBI_EXT_0_1_SHUTDOWN); -} - -static inline void sbi_clear_ipi(void) -{ - SBI_CALL_0(SBI_EXT_0_1_CLEAR_IPI); -} - -static inline void sbi_send_ipi(const unsigned long *hart_mask) -{ - SBI_CALL_1(SBI_EXT_0_1_SEND_IPI, hart_mask); -} - -static inline void sbi_remote_fence_i(const unsigned long *hart_mask) -{ - SBI_CALL_1(SBI_EXT_0_1_REMOTE_FENCE_I, hart_mask); -} - -static inline void sbi_remote_sfence_vma(const unsigned long *hart_mask, - unsigned long start, - unsigned long size) -{ - SBI_CALL_3(SBI_EXT_0_1_REMOTE_SFENCE_VMA, hart_mask, start, size); -} - -static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, - unsigned long start, - unsigned long size, - unsigned long asid) -{ - SBI_CALL_4(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, hart_mask, - start, size, asid); + return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; } #else /* CONFIG_RISCV_SBI */ /* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */ @@ -103,5 +99,6 @@ void sbi_set_timer(uint64_t stime_value); void sbi_clear_ipi(void); void sbi_send_ipi(const unsigned long *hart_mask); void sbi_remote_fence_i(const unsigned long *hart_mask); +void sbi_init(void); #endif /* CONFIG_RISCV_SBI */ #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f6c7c3e82d28..4aee0b49df3c 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -1,17 +1,256 @@ // SPDX-License-Identifier: GPL-2.0-only +/* + * SBI initialilization and all extension implementation. + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ #include #include #include +/* default SBI version is 0.1 */ +unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; +EXPORT_SYMBOL(sbi_spec_version); + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + struct sbiret ret; + + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + asm volatile ("ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} +EXPORT_SYMBOL(sbi_ecall); + +static int sbi_err_map_linux_errno(int err) +{ + switch (err) { + case SBI_SUCCESS: + return 0; + case SBI_ERR_DENIED: + return -EPERM; + case SBI_ERR_INVALID_PARAM: + return -EINVAL; + case SBI_ERR_INVALID_ADDRESS: + return -EFAULT; + case SBI_ERR_NOT_SUPPORTED: + case SBI_ERR_FAILURE: + default: + return -ENOTSUPP; + }; +} + +/** + * sbi_console_putchar() - Writes given character to the console device. + * @ch: The data to be written to the console. + * + * Return: None + */ +void sbi_console_putchar(int ch) +{ + sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_console_putchar); + +/** + * sbi_console_getchar() - Reads a byte from console device. + * + * Returns the value read from console. + */ +int sbi_console_getchar(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_0_1_CONSOLE_GETCHAR, 0, 0, 0, 0, 0, 0, 0); + + return ret.error; +} +EXPORT_SYMBOL(sbi_console_getchar); + +/** + * sbi_set_timer() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None + */ +void sbi_set_timer(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, 0, 0, 0, 0, 0); +#endif +} +EXPORT_SYMBOL(sbi_set_timer); + +/** + * sbi_shutdown() - Remove all the harts from executing supervisor code. + * + * Return: None + */ +void sbi_shutdown(void) +{ + sbi_ecall(SBI_EXT_0_1_SHUTDOWN, 0, 0, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_shutdown); + +/** + * sbi_clear_ipi() - Clear any pending IPIs for the calling hart. + * + * Return: None + */ +void sbi_clear_ipi(void) +{ + sbi_ecall(SBI_EXT_0_1_CLEAR_IPI, 0, 0, 0, 0, 0, 0, 0); +} + +/** + * sbi_send_ipi() - Send an IPI to any hart. + * @hart_mask: A cpu mask containing all the target harts. + * + * Return: None + */ +void sbi_send_ipi(const unsigned long *hart_mask) +{ + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, + 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_send_ipi); + +/** + * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts. + * @hart_mask: A cpu mask containing all the target harts. + * + * Return: None + */ +void sbi_remote_fence_i(const unsigned long *hart_mask) +{ + sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, (unsigned long)hart_mask, + 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_fence_i); + +/** + * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote + * harts for the specified virtual address range. + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. + * + * Return: None + */ +void sbi_remote_sfence_vma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size) +{ + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, + (unsigned long)hart_mask, start, size, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_sfence_vma); + +/** + * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given + * remote harts for a virtual address range belonging to a specific ASID. + * + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. + * @asid: The value of address space identifier (ASID). + * + * Return: None + */ +void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, + (unsigned long)hart_mask, start, size, asid, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); + +/** + * sbi_probe_extension() - Check if an SBI extension ID is supported or not. + * @extid: The extension ID to be probed. + * + * Return: Extension specific nonzero value f yes, -ENOTSUPP otherwise. + */ +int sbi_probe_extension(int extid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, + 0, 0, 0, 0, 0); + if (!ret.error) + if (ret.value) + return ret.value; + + return -ENOTSUPP; +} +EXPORT_SYMBOL(sbi_probe_extension); + +static long __sbi_base_ecall(int fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} + +static inline long sbi_get_spec_version(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); +} + +static inline long sbi_get_firmware_id(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_ID); +} + +static inline long sbi_get_firmware_version(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION); +} + static void sbi_power_off(void) { sbi_shutdown(); } -static int __init sbi_init(void) +int __init sbi_init(void) { + int ret; + pm_power_off = sbi_power_off; + ret = sbi_get_spec_version(); + if (ret > 0) + sbi_spec_version = ret; + + pr_info("SBI specification v%lu.%lu detected\n", + sbi_major_version(), sbi_minor_version()); + if (!sbi_spec_is_0_1()) + pr_info("SBI implementation ID=0x%lx Version=0x%lx\n", + sbi_get_firmware_id(), sbi_get_firmware_version()); return 0; } -early_initcall(sbi_init); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index cb836fcc6118..07f4e7503223 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,10 @@ void __init setup_arch(char **cmdline_p) kasan_init(); #endif +#if IS_ENABLED(CONFIG_RISCV_SBI) + sbi_init(); +#endif + #ifdef CONFIG_SMP setup_smp(); #endif From ecbacc2a3efd90cae34790379cc3e1b4932889d0 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:36 -0700 Subject: [PATCH 23/38] RISC-V: Add SBI v0.2 extension definitions Few v0.1 SBI calls are being replaced by new SBI calls that follows v0.2 calling convention. This patch just defines these new extensions. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 5b7b91c7e7e6..f68b6ed10a18 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -21,6 +21,9 @@ enum sbi_ext_id { SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID = 0x7, SBI_EXT_0_1_SHUTDOWN = 0x8, SBI_EXT_BASE = 0x10, + SBI_EXT_TIME = 0x54494D45, + SBI_EXT_IPI = 0x735049, + SBI_EXT_RFENCE = 0x52464E43, }; enum sbi_ext_base_fid { @@ -33,6 +36,24 @@ enum sbi_ext_base_fid { SBI_EXT_BASE_GET_MIMPID, }; +enum sbi_ext_time_fid { + SBI_EXT_TIME_SET_TIMER = 0, +}; + +enum sbi_ext_ipi_fid { + SBI_EXT_IPI_SEND_IPI = 0, +}; + +enum sbi_ext_rfence_fid { + SBI_EXT_RFENCE_REMOTE_FENCE_I = 0, + SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, +}; + #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f From efca13989250c3edebaf8fcaa8ca7c966739c65a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:37 -0700 Subject: [PATCH 24/38] RISC-V: Introduce a new config for SBI v0.1 We now have SBI v0.2 which is more scalable and extendable to handle future needs for RISC-V supervisor interfaces. Introduce a new config and move all SBI v0.1 code under that config. This allows to implement the new replacement SBI extensions cleanly and remove v0.1 extensions easily in future. Currently, the config is enabled by default. Once all M-mode software, with v0.1, is no longer in use, this config option and all relevant code can be easily removed. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 7 ++ arch/riscv/include/asm/sbi.h | 2 + arch/riscv/kernel/sbi.c | 140 ++++++++++++++++++++++++++++------- 3 files changed, 122 insertions(+), 27 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index b2764a0a0c8c..cac23a5c1104 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -314,6 +314,13 @@ config SECCOMP and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. +config RISCV_SBI_V01 + bool "SBI v0.1 support" + default y + depends on RISCV_SBI + help + This config allows kernel to use SBI v0.1 APIs. This will be + deprecated in future once legacy M-mode software are no longer in use. endmenu menu "Boot options" diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index f68b6ed10a18..d712b61f8dbc 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -11,6 +11,7 @@ #ifdef CONFIG_RISCV_SBI enum sbi_ext_id { +#ifdef CONFIG_RISCV_SBI_V01 SBI_EXT_0_1_SET_TIMER = 0x0, SBI_EXT_0_1_CONSOLE_PUTCHAR = 0x1, SBI_EXT_0_1_CONSOLE_GETCHAR = 0x2, @@ -20,6 +21,7 @@ enum sbi_ext_id { SBI_EXT_0_1_REMOTE_SFENCE_VMA = 0x6, SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID = 0x7, SBI_EXT_0_1_SHUTDOWN = 0x8, +#endif SBI_EXT_BASE = 0x10, SBI_EXT_TIME = 0x54494D45, SBI_EXT_IPI = 0x735049, diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 4aee0b49df3c..1368da62ec82 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -13,6 +13,12 @@ unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; EXPORT_SYMBOL(sbi_spec_version); +static void (*__sbi_set_timer)(uint64_t stime); +static int (*__sbi_send_ipi)(const unsigned long *hart_mask); +static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5); + struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, @@ -57,6 +63,7 @@ static int sbi_err_map_linux_errno(int err) }; } +#ifdef CONFIG_RISCV_SBI_V01 /** * sbi_console_putchar() - Writes given character to the console device. * @ch: The data to be written to the console. @@ -84,23 +91,6 @@ int sbi_console_getchar(void) } EXPORT_SYMBOL(sbi_console_getchar); -/** - * sbi_set_timer() - Program the timer for next timer event. - * @stime_value: The value after which next timer event should fire. - * - * Return: None - */ -void sbi_set_timer(uint64_t stime_value) -{ -#if __riscv_xlen == 32 - sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, - stime_value >> 32, 0, 0, 0, 0); -#else - sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, 0, 0, 0, 0, 0); -#endif -} -EXPORT_SYMBOL(sbi_set_timer); - /** * sbi_shutdown() - Remove all the harts from executing supervisor code. * @@ -110,7 +100,7 @@ void sbi_shutdown(void) { sbi_ecall(SBI_EXT_0_1_SHUTDOWN, 0, 0, 0, 0, 0, 0, 0); } -EXPORT_SYMBOL(sbi_shutdown); +EXPORT_SYMBOL(sbi_set_timer); /** * sbi_clear_ipi() - Clear any pending IPIs for the calling hart. @@ -121,6 +111,96 @@ void sbi_clear_ipi(void) { sbi_ecall(SBI_EXT_0_1_CLEAR_IPI, 0, 0, 0, 0, 0, 0, 0); } +EXPORT_SYMBOL(sbi_shutdown); + +/** + * sbi_set_timer_v01() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None + */ +static void __sbi_set_timer_v01(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, 0, 0, 0, 0, 0); +#endif +} + +static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +{ + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, + 0, 0, 0, 0, 0); + return 0; +} + +static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + int result = 0; + + /* v0.2 function IDs are equivalent to v0.1 extension IDs */ + switch (fid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, + (unsigned long)hart_mask, 0, 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, + (unsigned long)hart_mask, start, size, + 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, + (unsigned long)hart_mask, start, size, + arg4, 0, 0); + break; + default: + pr_err("SBI call [%d]not supported in SBI v0.1\n", fid); + result = -EINVAL; + } + + return result; +} +#else +static void __sbi_set_timer_v01(uint64_t stime_value) +{ + pr_warn("Timer extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); +} + +static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +{ + pr_warn("IPI extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); + + return 0; +} + +static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + pr_warn("remote fence extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); + + return 0; +} +#endif /* CONFIG_RISCV_SBI_V01 */ + +/** + * sbi_set_timer() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None + */ +void sbi_set_timer(uint64_t stime_value) +{ + __sbi_set_timer(stime_value); +} /** * sbi_send_ipi() - Send an IPI to any hart. @@ -130,8 +210,7 @@ void sbi_clear_ipi(void) */ void sbi_send_ipi(const unsigned long *hart_mask) { - sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, - 0, 0, 0, 0, 0); + __sbi_send_ipi(hart_mask); } EXPORT_SYMBOL(sbi_send_ipi); @@ -143,8 +222,8 @@ EXPORT_SYMBOL(sbi_send_ipi); */ void sbi_remote_fence_i(const unsigned long *hart_mask) { - sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, (unsigned long)hart_mask, - 0, 0, 0, 0, 0); + __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I, + hart_mask, 0, 0, 0, 0); } EXPORT_SYMBOL(sbi_remote_fence_i); @@ -161,8 +240,8 @@ void sbi_remote_sfence_vma(const unsigned long *hart_mask, unsigned long start, unsigned long size) { - sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, - (unsigned long)hart_mask, start, size, 0, 0, 0); + __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + hart_mask, start, size, 0, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma); @@ -182,8 +261,8 @@ void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, unsigned long size, unsigned long asid) { - sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, - (unsigned long)hart_mask, start, size, asid, 0, 0); + __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + hart_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); @@ -249,8 +328,15 @@ int __init sbi_init(void) pr_info("SBI specification v%lu.%lu detected\n", sbi_major_version(), sbi_minor_version()); - if (!sbi_spec_is_0_1()) + + if (!sbi_spec_is_0_1()) { pr_info("SBI implementation ID=0x%lx Version=0x%lx\n", sbi_get_firmware_id(), sbi_get_firmware_version()); + } + + __sbi_set_timer = __sbi_set_timer_v01; + __sbi_send_ipi = __sbi_send_ipi_v01; + __sbi_rfence = __sbi_rfence_v01; + return 0; } From 1ef46c231df4b856559ec0234bfcbb41a1180b97 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:38 -0700 Subject: [PATCH 25/38] RISC-V: Implement new SBI v0.2 extensions Few v0.1 SBI calls are being replaced by new SBI calls that follows v0.2 calling convention. Implement the replacement extensions and few additional new SBI function calls that makes way for a better SBI interface in future. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 14 ++ arch/riscv/include/asm/smp.h | 7 + arch/riscv/kernel/sbi.c | 253 ++++++++++++++++++++++++++++++++++- 3 files changed, 270 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index d712b61f8dbc..29ce2c494386 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -96,6 +96,20 @@ void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, unsigned long start, unsigned long size, unsigned long asid); +int sbi_remote_hfence_gvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size); +int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long vmid); +int sbi_remote_hfence_vvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size); +int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid); int sbi_probe_extension(int ext); /* Check if current SBI specification version is 0.1 or not */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index a83451d73a4e..023f74fb8b3b 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -61,5 +61,12 @@ static inline unsigned long cpuid_to_hartid_map(int cpu) return boot_cpu_hartid; } +static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, + struct cpumask *out) +{ + cpumask_clear(out); + cpumask_set_cpu(boot_cpu_hartid, out); +} + #endif /* CONFIG_SMP */ #endif /* _ASM_RISCV_SMP_H */ diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 1368da62ec82..1cc0052e1b63 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -8,6 +8,7 @@ #include #include #include +#include /* default SBI version is 0.1 */ unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; @@ -191,6 +192,153 @@ static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, } #endif /* CONFIG_RISCV_SBI_V01 */ +static void __sbi_set_timer_v02(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_TIME, SBI_EXT_TIME_SET_TIMER, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_TIME, SBI_EXT_TIME_SET_TIMER, stime_value, 0, + 0, 0, 0, 0); +#endif +} + +static int __sbi_send_ipi_v02(const unsigned long *hart_mask) +{ + unsigned long hartid, hmask_val, hbase; + struct cpumask tmask; + struct sbiret ret = {0}; + int result; + + if (!hart_mask || !(*hart_mask)) { + riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); + hart_mask = cpumask_bits(&tmask); + } + + hmask_val = 0; + hbase = 0; + for_each_set_bit(hartid, hart_mask, NR_CPUS) { + if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { + ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, + hmask_val, hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask_val = 0; + hbase = 0; + } + if (!hmask_val) + hbase = hartid; + hmask_val |= 1UL << (hartid - hbase); + } + + if (hmask_val) { + ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, + hmask_val, hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + } + + return 0; + +ecall_failed: + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", + __func__, hbase, hmask_val, result); + return result; +} + +static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, + unsigned long hbase, unsigned long start, + unsigned long size, unsigned long arg4, + unsigned long arg5) +{ + struct sbiret ret = {0}; + int ext = SBI_EXT_RFENCE; + int result = 0; + + switch (fid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, arg4, 0); + break; + + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, arg4, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, arg4, 0); + break; + default: + pr_err("unknown function ID [%lu] for SBI extension [%d]\n", + fid, ext); + result = -EINVAL; + } + + if (ret.error) { + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", + __func__, hbase, hmask_val, result); + } + + return result; +} + +static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + unsigned long hmask_val, hartid, hbase; + struct cpumask tmask; + int result; + + if (!hart_mask || !(*hart_mask)) { + riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); + hart_mask = cpumask_bits(&tmask); + } + + hmask_val = 0; + hbase = 0; + for_each_set_bit(hartid, hart_mask, NR_CPUS) { + if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { + result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + start, size, arg4, arg5); + if (result) + return result; + hmask_val = 0; + hbase = 0; + } + if (!hmask_val) + hbase = hartid; + hmask_val |= 1UL << (hartid - hbase); + } + + if (hmask_val) { + result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + start, size, arg4, arg5); + if (result) + return result; + } + + return 0; +} + /** * sbi_set_timer() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. @@ -266,6 +414,85 @@ void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); +/** + * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote + * harts for the specified guest physical address range. + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the guest physical address + * @size: Total size of the guest physical address range. + * + * Return: None + */ +int sbi_remote_hfence_gvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, + hart_mask, start, size, 0, 0); +} +EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); + +/** + * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given + * remote harts for a guest physical address range belonging to a specific VMID. + * + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the guest physical address + * @size: Total size of the guest physical address range. + * @vmid: The value of guest ID (VMID). + * + * Return: 0 if success, Error otherwise. + */ +int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long vmid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, + hart_mask, start, size, vmid, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid); + +/** + * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote + * harts for the current guest virtual address range. + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the current guest virtual address + * @size: Total size of the current guest virtual address range. + * + * Return: None + */ +int sbi_remote_hfence_vvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + hart_mask, start, size, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_vvma); + +/** + * sbi_remote_hfence_vvma_asid() - Execute HFENCE.VVMA instructions on given + * remote harts for current guest virtual address range belonging to a specific + * ASID. + * + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the current guest virtual address + * @size: Total size of the current guest virtual address range. + * @asid: The value of address space identifier (ASID). + * + * Return: None + */ +int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, + hart_mask, start, size, asid, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid); + /** * sbi_probe_extension() - Check if an SBI extension ID is supported or not. * @extid: The extension ID to be probed. @@ -332,11 +559,29 @@ int __init sbi_init(void) if (!sbi_spec_is_0_1()) { pr_info("SBI implementation ID=0x%lx Version=0x%lx\n", sbi_get_firmware_id(), sbi_get_firmware_version()); + if (sbi_probe_extension(SBI_EXT_TIME) > 0) { + __sbi_set_timer = __sbi_set_timer_v02; + pr_info("SBI v0.2 TIME extension detected\n"); + } else { + __sbi_set_timer = __sbi_set_timer_v01; + } + if (sbi_probe_extension(SBI_EXT_IPI) > 0) { + __sbi_send_ipi = __sbi_send_ipi_v02; + pr_info("SBI v0.2 IPI extension detected\n"); + } else { + __sbi_send_ipi = __sbi_send_ipi_v01; + } + if (sbi_probe_extension(SBI_EXT_RFENCE) > 0) { + __sbi_rfence = __sbi_rfence_v02; + pr_info("SBI v0.2 RFENCE extension detected\n"); + } else { + __sbi_rfence = __sbi_rfence_v01; + } + } else { + __sbi_set_timer = __sbi_set_timer_v01; + __sbi_send_ipi = __sbi_send_ipi_v01; + __sbi_rfence = __sbi_rfence_v01; } - __sbi_set_timer = __sbi_set_timer_v01; - __sbi_send_ipi = __sbi_send_ipi_v01; - __sbi_rfence = __sbi_rfence_v01; - return 0; } From e011995e826f85fbe55dc7d4ce649461163d1052 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:39 -0700 Subject: [PATCH 26/38] RISC-V: Move relocate and few other functions out of __init The secondary hart booting and relocation code are under .init section. As a result, it will be freed once kernel booting is done. However, ordered booting protocol and CPU hotplug always requires these functions to be present to bringup harts after initial kernel boot. Move the required functions to a different section and make sure that they are in memory within first 2MB offset as trampoline page directory only maps first 2MB. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 153 +++++++++++++++++--------------- arch/riscv/kernel/vmlinux.lds.S | 5 +- 2 files changed, 86 insertions(+), 72 deletions(-) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 85f2073e7fe4..173507395a6b 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -14,7 +14,7 @@ #include #include -__INIT +__HEAD ENTRY(_start) /* * Image header expected by Linux boot-loaders. The image header data @@ -45,8 +45,85 @@ ENTRY(_start) .ascii RISCV_IMAGE_MAGIC2 .word 0 -.global _start_kernel -_start_kernel: +.align 2 +#ifdef CONFIG_MMU +relocate: + /* Relocate return address */ + li a1, PAGE_OFFSET + la a2, _start + sub a1, a1, a2 + add ra, ra, a1 + + /* Point stvec to virtual address of intruction after satp write */ + la a2, 1f + add a2, a2, a1 + csrw CSR_TVEC, a2 + + /* Compute satp for kernel page tables, but don't load it yet */ + srl a2, a0, PAGE_SHIFT + li a1, SATP_MODE + or a2, a2, a1 + + /* + * Load trampoline page directory, which will cause us to trap to + * stvec if VA != PA, or simply fall through if VA == PA. We need a + * full fence here because setup_vm() just wrote these PTEs and we need + * to ensure the new translations are in use. + */ + la a0, trampoline_pg_dir + srl a0, a0, PAGE_SHIFT + or a0, a0, a1 + sfence.vma + csrw CSR_SATP, a0 +.align 2 +1: + /* Set trap vector to spin forever to help debug */ + la a0, .Lsecondary_park + csrw CSR_TVEC, a0 + + /* Reload the global pointer */ +.option push +.option norelax + la gp, __global_pointer$ +.option pop + + /* + * Switch to kernel page tables. A full fence is necessary in order to + * avoid using the trampoline translations, which are only correct for + * the first superpage. Fetching the fence is guarnteed to work + * because that first superpage is translated the same way. + */ + csrw CSR_SATP, a2 + sfence.vma + + ret +#endif /* CONFIG_MMU */ +#ifdef CONFIG_SMP + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 + + slli a3, a0, LGREG + .global secondary_start_common +secondary_start_common: + +#ifdef CONFIG_MMU + /* Enable virtual memory and relocate to virtual address */ + la a0, swapper_pg_dir + call relocate +#endif + tail smp_callin +#endif /* CONFIG_SMP */ + +.Lsecondary_park: + /* We lack SMP support or have too many harts, so park this hart */ + wfi + j .Lsecondary_park + +END(_start) + + __INIT +ENTRY(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero csrw CSR_IP, zero @@ -134,59 +211,6 @@ clear_bss_done: call parse_dtb tail start_kernel -#ifdef CONFIG_MMU -relocate: - /* Relocate return address */ - li a1, PAGE_OFFSET - la a2, _start - sub a1, a1, a2 - add ra, ra, a1 - - /* Point stvec to virtual address of intruction after satp write */ - la a2, 1f - add a2, a2, a1 - csrw CSR_TVEC, a2 - - /* Compute satp for kernel page tables, but don't load it yet */ - srl a2, a0, PAGE_SHIFT - li a1, SATP_MODE - or a2, a2, a1 - - /* - * Load trampoline page directory, which will cause us to trap to - * stvec if VA != PA, or simply fall through if VA == PA. We need a - * full fence here because setup_vm() just wrote these PTEs and we need - * to ensure the new translations are in use. - */ - la a0, trampoline_pg_dir - srl a0, a0, PAGE_SHIFT - or a0, a0, a1 - sfence.vma - csrw CSR_SATP, a0 -.align 2 -1: - /* Set trap vector to spin forever to help debug */ - la a0, .Lsecondary_park - csrw CSR_TVEC, a0 - - /* Reload the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop - - /* - * Switch to kernel page tables. A full fence is necessary in order to - * avoid using the trampoline translations, which are only correct for - * the first superpage. Fetching the fence is guarnteed to work - * because that first superpage is translated the same way. - */ - csrw CSR_SATP, a2 - sfence.vma - - ret -#endif /* CONFIG_MMU */ - .Lsecondary_start: #ifdef CONFIG_SMP /* Set trap vector to spin forever to help debug */ @@ -211,16 +235,10 @@ relocate: beqz tp, .Lwait_for_cpu_up fence -#ifdef CONFIG_MMU - /* Enable virtual memory and relocate to virtual address */ - la a0, swapper_pg_dir - call relocate + tail secondary_start_common #endif - tail smp_callin -#endif - -END(_start) +END(_start_kernel) #ifdef CONFIG_RISCV_M_MODE ENTRY(reset_regs) @@ -301,13 +319,6 @@ ENTRY(reset_regs) END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ -.section ".text", "ax",@progbits -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park - __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 4ad3c8eb241d..435cd60dca04 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -11,6 +11,7 @@ #include #include +#include OUTPUT_ARCH(riscv) ENTRY(_start) @@ -21,8 +22,10 @@ SECTIONS /* Beginning of code and text segment */ . = LOAD_OFFSET; _start = .; - __init_begin = .; HEAD_TEXT_SECTION + . = ALIGN(PAGE_SIZE); + + __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) /* we have to discard exit text and such at runtime, not link time */ From 2875fe0561569f82d0e63658ccf0d11ce7da8922 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:40 -0700 Subject: [PATCH 27/38] RISC-V: Add cpu_ops and modify default booting method Currently, all non-booting harts start booting after the booting hart updates the per-hart stack pointer. This is done in a way that, it's difficult to implement any other booting method without breaking the backward compatibility. Define a cpu_ops method that allows to introduce other booting methods in future. Modify the current booting method to be compatible with cpu_ops. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cpu_ops.h | 34 +++++++++++++++++++ arch/riscv/kernel/Makefile | 2 ++ arch/riscv/kernel/cpu_ops.c | 38 +++++++++++++++++++++ arch/riscv/kernel/cpu_ops_spinwait.c | 43 +++++++++++++++++++++++ arch/riscv/kernel/smpboot.c | 51 ++++++++++++++++------------ 5 files changed, 147 insertions(+), 21 deletions(-) create mode 100644 arch/riscv/include/asm/cpu_ops.h create mode 100644 arch/riscv/kernel/cpu_ops.c create mode 100644 arch/riscv/kernel/cpu_ops_spinwait.c diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h new file mode 100644 index 000000000000..5ce81a28e1d9 --- /dev/null +++ b/arch/riscv/include/asm/cpu_ops.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + * Based on arch/arm64/include/asm/cpu_ops.h + */ +#ifndef __ASM_CPU_OPS_H +#define __ASM_CPU_OPS_H + +#include +#include +#include + +/** + * struct cpu_operations - Callback operations for hotplugging CPUs. + * + * @name: Name of the boot protocol. + * @cpu_prepare: Early one-time preparation step for a cpu. If there + * is a mechanism for doing so, tests whether it is + * possible to boot the given HART. + * @cpu_start: Boots a cpu into the kernel. + */ +struct cpu_operations { + const char *name; + int (*cpu_prepare)(unsigned int cpu); + int (*cpu_start)(unsigned int cpu, + struct task_struct *tidle); +}; + +extern const struct cpu_operations *cpu_ops[NR_CPUS]; +void __init cpu_set_ops(int cpu); +void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle); + +#endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index d189bd3d8501..43d49ea36a3f 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -34,6 +34,8 @@ obj-$(CONFIG_RISCV_M_MODE) += clint.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += cpu_ops.o +obj-$(CONFIG_SMP) += cpu_ops_spinwait.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c new file mode 100644 index 000000000000..62705908eee5 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; + +void *__cpu_up_stack_pointer[NR_CPUS]; +void *__cpu_up_task_pointer[NR_CPUS]; + +extern const struct cpu_operations cpu_ops_spinwait; + +void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle) +{ + int hartid = cpuid_to_hartid_map(cpuid); + + /* Make sure tidle is updated */ + smp_mb(); + WRITE_ONCE(__cpu_up_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); +} + +void __init cpu_set_ops(int cpuid) +{ + cpu_ops[cpuid] = &cpu_ops_spinwait; +} diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c new file mode 100644 index 000000000000..b2c957bb68c1 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include +#include +#include +#include +#include +#include + +const struct cpu_operations cpu_ops_spinwait; + +static int spinwait_cpu_prepare(unsigned int cpuid) +{ + if (!cpu_ops_spinwait.cpu_start) { + pr_err("cpu start method not defined for CPU [%d]\n", cpuid); + return -ENODEV; + } + return 0; +} + +static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) +{ + /* + * In this protocol, all cpus boot on their own accord. _start + * selects the first cpu to boot the kernel and causes the remainder + * of the cpus to spin in a loop waiting for their stack pointer to be + * setup by that main cpu. Writing to bootdata + * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they + * can continue the boot process. + */ + cpu_update_secondary_bootdata(cpuid, tidle); + + return 0; +} + +const struct cpu_operations cpu_ops_spinwait = { + .name = "spinwait", + .cpu_prepare = spinwait_cpu_prepare, + .cpu_start = spinwait_cpu_start, +}; diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 8bc01f0ca73b..e89396a2a1af 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -34,8 +35,6 @@ #include "head.h" -void *__cpu_up_stack_pointer[NR_CPUS]; -void *__cpu_up_task_pointer[NR_CPUS]; static DECLARE_COMPLETION(cpu_running); void __init smp_prepare_boot_cpu(void) @@ -46,6 +45,7 @@ void __init smp_prepare_boot_cpu(void) void __init smp_prepare_cpus(unsigned int max_cpus) { int cpuid; + int ret; /* This covers non-smp usecase mandated by "nosmp" option */ if (max_cpus == 0) @@ -54,6 +54,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpuid) { if (cpuid == smp_processor_id()) continue; + if (cpu_ops[cpuid]->cpu_prepare) { + ret = cpu_ops[cpuid]->cpu_prepare(cpuid); + if (ret) + continue; + } set_cpu_present(cpuid, true); } } @@ -65,6 +70,8 @@ void __init setup_smp(void) bool found_boot_cpu = false; int cpuid = 1; + cpu_set_ops(0); + for_each_of_cpu_node(dn) { hart = riscv_of_processor_hartid(dn); if (hart < 0) @@ -92,36 +99,38 @@ void __init setup_smp(void) cpuid, nr_cpu_ids); for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) { - if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) + if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) { + cpu_set_ops(cpuid); set_cpu_possible(cpuid, true); + } } } +int start_secondary_cpu(int cpu, struct task_struct *tidle) +{ + if (cpu_ops[cpu]->cpu_start) + return cpu_ops[cpu]->cpu_start(cpu, tidle); + + return -EOPNOTSUPP; +} + int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret = 0; - int hartid = cpuid_to_hartid_map(cpu); tidle->thread_info.cpu = cpu; - /* - * On RISC-V systems, all harts boot on their own accord. Our _start - * selects the first hart to boot the kernel and causes the remainder - * of the harts to spin in a loop waiting for their stack pointer to be - * setup by that main hart. Writing __cpu_up_stack_pointer signals to - * the spinning harts that they can continue the boot process. - */ - smp_mb(); - WRITE_ONCE(__cpu_up_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); - - lockdep_assert_held(&cpu_running); - wait_for_completion_timeout(&cpu_running, + ret = start_secondary_cpu(cpu, tidle); + if (!ret) { + lockdep_assert_held(&cpu_running); + wait_for_completion_timeout(&cpu_running, msecs_to_jiffies(1000)); - if (!cpu_online(cpu)) { - pr_crit("CPU%u: failed to come online\n", cpu); - ret = -EIO; + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); + ret = -EIO; + } + } else { + pr_crit("CPU%u: failed to start\n", cpu); } return ret; From f90b43ce176c129a84237c9d57fae51aeff3e6ec Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:41 -0700 Subject: [PATCH 28/38] RISC-V: Export SBI error to linux error mapping function All SBI related extensions will not be implemented in sbi.c to avoid bloating. Thus, sbi_err_map_linux_errno() will be used in other files implementing that specific extension. Export the function so that it can be used later. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 2 ++ arch/riscv/kernel/sbi.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 29ce2c494386..2bbfd6bada93 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -130,6 +130,8 @@ static inline unsigned long sbi_minor_version(void) { return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; } + +int sbi_err_map_linux_errno(int err); #else /* CONFIG_RISCV_SBI */ /* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */ void sbi_set_timer(uint64_t stime_value); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 1cc0052e1b63..7c24da59bccf 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -46,7 +46,7 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, } EXPORT_SYMBOL(sbi_ecall); -static int sbi_err_map_linux_errno(int err) +int sbi_err_map_linux_errno(int err) { switch (err) { case SBI_SUCCESS: @@ -63,6 +63,7 @@ static int sbi_err_map_linux_errno(int err) return -ENOTSUPP; }; } +EXPORT_SYMBOL(sbi_err_map_linux_errno); #ifdef CONFIG_RISCV_SBI_V01 /** From db5a79460315bd12dedee5f964cd72f3a534ecb2 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:42 -0700 Subject: [PATCH 29/38] RISC-V: Add SBI HSM extension definitions SBI specification defines HSM extension that allows to start/stop a hart by a supervisor anytime. The specification is available at https://github.com/riscv/riscv-sbi-doc/blob/master/riscv-sbi.adoc Add those definitions here. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2bbfd6bada93..653edb25d495 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -26,6 +26,7 @@ enum sbi_ext_id { SBI_EXT_TIME = 0x54494D45, SBI_EXT_IPI = 0x735049, SBI_EXT_RFENCE = 0x52464E43, + SBI_EXT_HSM = 0x48534D, }; enum sbi_ext_base_fid { @@ -56,6 +57,19 @@ enum sbi_ext_rfence_fid { SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, }; +enum sbi_ext_hsm_fid { + SBI_EXT_HSM_HART_START = 0, + SBI_EXT_HSM_HART_STOP, + SBI_EXT_HSM_HART_STATUS, +}; + +enum sbi_hsm_hart_status { + SBI_HSM_HART_STATUS_STARTED = 0, + SBI_HSM_HART_STATUS_STOPPED, + SBI_HSM_HART_STATUS_START_PENDING, + SBI_HSM_HART_STATUS_STOP_PENDING, +}; + #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 #define SBI_SPEC_VERSION_MAJOR_MASK 0x7f From cfafe260137418d0265d0df3bb18dc494af2b43e Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:43 -0700 Subject: [PATCH 30/38] RISC-V: Add supported for ordered booting method using HSM Currently, all harts have to jump Linux in RISC-V. This complicates the multi-stage boot process as every transient stage also has to ensure all harts enter to that stage and jump to Linux afterwards. It also obstructs a clean Kexec implementation. SBI HSM extension provides alternate solutions where only a single hart need to boot and enter Linux. The booting hart can bring up secondary harts one by one afterwards. Add SBI HSM based cpu_ops that implements an ordered booting method in RISC-V. This change is also backward compatible with older firmware not implementing HSM extension. If a latest kernel is used with older firmware, it will continue to use the default spinning booting method. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 3 ++ arch/riscv/kernel/cpu_ops.c | 10 +++- arch/riscv/kernel/cpu_ops_sbi.c | 81 +++++++++++++++++++++++++++++++++ arch/riscv/kernel/head.S | 26 +++++++++++ arch/riscv/kernel/smpboot.c | 2 +- arch/riscv/kernel/traps.c | 2 +- 6 files changed, 121 insertions(+), 3 deletions(-) create mode 100644 arch/riscv/kernel/cpu_ops_sbi.c diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 43d49ea36a3f..674a23cc4223 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -46,5 +46,8 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o +ifeq ($(CONFIG_RISCV_SBI), y) +obj-$(CONFIG_SMP) += cpu_ops_sbi.o +endif clean: diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 62705908eee5..c4c33bf02369 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -18,6 +18,7 @@ const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; void *__cpu_up_stack_pointer[NR_CPUS]; void *__cpu_up_task_pointer[NR_CPUS]; +extern const struct cpu_operations cpu_ops_sbi; extern const struct cpu_operations cpu_ops_spinwait; void cpu_update_secondary_bootdata(unsigned int cpuid, @@ -34,5 +35,12 @@ void cpu_update_secondary_bootdata(unsigned int cpuid, void __init cpu_set_ops(int cpuid) { - cpu_ops[cpuid] = &cpu_ops_spinwait; +#if IS_ENABLED(CONFIG_RISCV_SBI) + if (sbi_probe_extension(SBI_EXT_HSM) > 0) { + if (!cpuid) + pr_info("SBI v0.2 HSM extension detected\n"); + cpu_ops[cpuid] = &cpu_ops_sbi; + } else +#endif + cpu_ops[cpuid] = &cpu_ops_spinwait; } diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c new file mode 100644 index 000000000000..66f3cded91f5 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HSM extension and cpu_ops implementation. + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include +#include +#include +#include +#include + +extern char secondary_start_sbi[]; +const struct cpu_operations cpu_ops_sbi; + +static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, + unsigned long priv) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_START, + hartid, saddr, priv, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int sbi_hsm_hart_stop(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_STOP, 0, 0, 0, 0, 0, 0); + + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return 0; +} + +static int sbi_hsm_hart_get_status(unsigned long hartid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_STATUS, + hartid, 0, 0, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return ret.value; +} +#endif + +static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) +{ + int rc; + unsigned long boot_addr = __pa_symbol(secondary_start_sbi); + int hartid = cpuid_to_hartid_map(cpuid); + + cpu_update_secondary_bootdata(cpuid, tidle); + rc = sbi_hsm_hart_start(hartid, boot_addr, 0); + + return rc; +} + +static int sbi_cpu_prepare(unsigned int cpuid) +{ + if (!cpu_ops_sbi.cpu_start) { + pr_err("cpu start method not defined for CPU [%d]\n", cpuid); + return -ENODEV; + } + return 0; +} + +const struct cpu_operations cpu_ops_sbi = { + .name = "sbi", + .cpu_prepare = sbi_cpu_prepare, + .cpu_start = sbi_cpu_start, +}; diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 173507395a6b..e5115d5e0b3a 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -99,11 +99,37 @@ relocate: ret #endif /* CONFIG_MMU */ #ifdef CONFIG_SMP + .global secondary_start_sbi +secondary_start_sbi: + /* Mask all interrupts */ + csrw CSR_IE, zero + csrw CSR_IP, zero + + /* Load the global pointer */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + + /* + * Disable FPU to detect illegal usage of + * floating point in kernel space + */ + li t0, SR_FS + csrc CSR_STATUS, t0 + /* Set trap vector to spin forever to help debug */ la a3, .Lsecondary_park csrw CSR_TVEC, a3 slli a3, a0, LGREG + la a4, __cpu_up_stack_pointer + la a5, __cpu_up_task_pointer + add a4, a3, a4 + add a5, a3, a5 + REG_L sp, (a4) + REG_L tp, (a5) + .global secondary_start_common secondary_start_common: diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index e89396a2a1af..4e9922790f6e 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -143,7 +143,7 @@ void __init smp_cpus_done(unsigned int max_cpus) /* * C entry point for a secondary processor. */ -asmlinkage __visible void __init smp_callin(void) +asmlinkage __visible void smp_callin(void) { struct mm_struct *mm = &init_mm; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index a4d136355f78..23a57b92cd1d 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -148,7 +148,7 @@ int is_valid_bugaddr(unsigned long pc) } #endif /* CONFIG_GENERIC_BUG */ -void __init trap_init(void) +void trap_init(void) { /* * Set sup0 scratch register to 0, indicating to exception vector From f1e58583b9c7ceae7f11646e9edf2561d67f29c9 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Tue, 17 Mar 2020 18:11:44 -0700 Subject: [PATCH 31/38] RISC-V: Support cpu hotplug This patch enable support for cpu hotplug in RISC-V. It uses SBI HSM extension to online/offline any hart. As a result, the harts are returned to firmware once they are offline. If the harts are brought online afterwards, they re-enter Linux kernel as if a secondary hart booted for the first time. All booting requirements are honored during this process. Tested both on QEMU and HighFive Unleashed board with. Test result follows. --------------------------------------------------- Offline cpu 2 --------------------------------------------------- $ echo 0 > /sys/devices/system/cpu/cpu2/online [ 32.828684] CPU2: off $ cat /proc/cpuinfo processor : 0 hart : 0 isa : rv64imafdcsu mmu : sv48 processor : 1 hart : 1 isa : rv64imafdcsu mmu : sv48 processor : 3 hart : 3 isa : rv64imafdcsu mmu : sv48 processor : 4 hart : 4 isa : rv64imafdcsu mmu : sv48 processor : 5 hart : 5 isa : rv64imafdcsu mmu : sv48 processor : 6 hart : 6 isa : rv64imafdcsu mmu : sv48 processor : 7 hart : 7 isa : rv64imafdcsu mmu : sv48 --------------------------------------------------- online cpu 2 --------------------------------------------------- $ echo 1 > /sys/devices/system/cpu/cpu2/online $ cat /proc/cpuinfo processor : 0 hart : 0 isa : rv64imafdcsu mmu : sv48 processor : 1 hart : 1 isa : rv64imafdcsu mmu : sv48 processor : 2 hart : 2 isa : rv64imafdcsu mmu : sv48 processor : 3 hart : 3 isa : rv64imafdcsu mmu : sv48 processor : 4 hart : 4 isa : rv64imafdcsu mmu : sv48 processor : 5 hart : 5 isa : rv64imafdcsu mmu : sv48 processor : 6 hart : 6 isa : rv64imafdcsu mmu : sv48 processor : 7 hart : 7 isa : rv64imafdcsu mmu : sv48 Signed-off-by: Atish Patra Reviewed-by: Anup Patel --- arch/riscv/Kconfig | 12 ++++- arch/riscv/include/asm/cpu_ops.h | 12 +++++ arch/riscv/include/asm/smp.h | 17 +++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/cpu-hotplug.c | 87 ++++++++++++++++++++++++++++++++ arch/riscv/kernel/cpu_ops_sbi.c | 34 +++++++++++++ arch/riscv/kernel/setup.c | 19 ++++++- 7 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/kernel/cpu-hotplug.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index cac23a5c1104..bc713666f00a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -20,7 +20,6 @@ config RISCV select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS - select GENERIC_CPU_DEVICES select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK @@ -254,6 +253,17 @@ config NR_CPUS depends on SMP default "8" +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + select GENERIC_IRQ_MIGRATION + help + + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + + Say N if you want to disable CPU hotplug. + choice prompt "CPU Tuning" default TUNE_GENERIC diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h index 5ce81a28e1d9..a8ec3c5c1bd2 100644 --- a/arch/riscv/include/asm/cpu_ops.h +++ b/arch/riscv/include/asm/cpu_ops.h @@ -18,12 +18,24 @@ * is a mechanism for doing so, tests whether it is * possible to boot the given HART. * @cpu_start: Boots a cpu into the kernel. + * @cpu_disable: Prepares a cpu to die. May fail for some + * mechanism-specific reason, which will cause the hot + * unplug to be aborted. Called from the cpu to be killed. + * @cpu_stop: Makes a cpu leave the kernel. Must not fail. Called from + * the cpu being stopped. + * @cpu_is_stopped: Ensures a cpu has left the kernel. Called from another + * cpu. */ struct cpu_operations { const char *name; int (*cpu_prepare)(unsigned int cpu); int (*cpu_start)(unsigned int cpu, struct task_struct *tidle); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(unsigned int cpu); + void (*cpu_stop)(void); + int (*cpu_is_stopped)(unsigned int cpu); +#endif }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 023f74fb8b3b..f4c7cfda6b7f 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -43,6 +43,13 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); */ #define raw_smp_processor_id() (current_thread_info()->cpu) +#if defined CONFIG_HOTPLUG_CPU +int __cpu_disable(void); +void __cpu_die(unsigned int cpu); +void cpu_stop(void); +#else +#endif /* CONFIG_HOTPLUG_CPU */ + #else static inline void show_ipi_stats(struct seq_file *p, int prec) @@ -69,4 +76,14 @@ static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, } #endif /* CONFIG_SMP */ + +#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) +bool cpu_has_hotplug(unsigned int cpu); +#else +static inline bool cpu_has_hotplug(unsigned int cpu) +{ + return false; +} +#endif + #endif /* _ASM_RISCV_SMP_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 674a23cc4223..c121cc491eb8 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -49,5 +49,6 @@ obj-$(CONFIG_RISCV_SBI) += sbi.o ifeq ($(CONFIG_RISCV_SBI), y) obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif +obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o clean: diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c new file mode 100644 index 000000000000..df84e0c13db1 --- /dev/null +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void cpu_stop(void); +void arch_cpu_idle_dead(void) +{ + cpu_stop(); +} + +bool cpu_has_hotplug(unsigned int cpu) +{ + if (cpu_ops[cpu]->cpu_stop) + return true; + + return false; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + int ret = 0; + unsigned int cpu = smp_processor_id(); + + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_stop) + return -EOPNOTSUPP; + + if (cpu_ops[cpu]->cpu_disable) + ret = cpu_ops[cpu]->cpu_disable(cpu); + + if (ret) + return ret; + + remove_cpu_topology(cpu); + set_cpu_online(cpu, false); + irq_migrate_all_off_this_cpu(); + + return ret; +} + +/* + * Called on the thread which is asking for a CPU to be shutdown. + */ +void __cpu_die(unsigned int cpu) +{ + int ret = 0; + + if (!cpu_wait_death(cpu, 5)) { + pr_err("CPU %u: didn't die\n", cpu); + return; + } + pr_notice("CPU%u: off\n", cpu); + + /* Verify from the firmware if the cpu is really stopped*/ + if (cpu_ops[cpu]->cpu_is_stopped) + ret = cpu_ops[cpu]->cpu_is_stopped(cpu); + if (ret) + pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + */ +void cpu_stop(void) +{ + idle_task_exit(); + + (void)cpu_report_death(); + + cpu_ops[smp_processor_id()]->cpu_stop(); + /* It should never reach here */ + BUG(); +} diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index 66f3cded91f5..685fae72b7f5 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -74,8 +74,42 @@ static int sbi_cpu_prepare(unsigned int cpuid) return 0; } +#ifdef CONFIG_HOTPLUG_CPU +static int sbi_cpu_disable(unsigned int cpuid) +{ + if (!cpu_ops_sbi.cpu_stop) + return -EOPNOTSUPP; + return 0; +} + +static void sbi_cpu_stop(void) +{ + int ret; + + ret = sbi_hsm_hart_stop(); + pr_crit("Unable to stop the cpu %u (%d)\n", smp_processor_id(), ret); +} + +static int sbi_cpu_is_stopped(unsigned int cpuid) +{ + int rc; + int hartid = cpuid_to_hartid_map(cpuid); + + rc = sbi_hsm_hart_get_status(hartid); + + if (rc == SBI_HSM_HART_STATUS_STOPPED) + return 0; + return rc; +} +#endif + const struct cpu_operations cpu_ops_sbi = { .name = "sbi", .cpu_prepare = sbi_cpu_prepare, .cpu_start = sbi_cpu_start, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = sbi_cpu_disable, + .cpu_stop = sbi_cpu_stop, + .cpu_is_stopped = sbi_cpu_is_stopped, +#endif }; diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 07f4e7503223..145128a7e560 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -16,12 +16,13 @@ #include #include #include +#include #include +#include #include #include #include -#include #include #include #include @@ -47,6 +48,7 @@ struct screen_info screen_info = { */ atomic_t hart_lottery __section(.sdata); unsigned long boot_cpu_hartid; +static DEFINE_PER_CPU(struct cpu, cpu_devices); void __init parse_dtb(void) { @@ -94,3 +96,18 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); } + +static int __init topology_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cpu *cpu = &per_cpu(cpu_devices, i); + + cpu->hotpluggable = cpu_has_hotplug(i); + register_cpu(cpu, i); + } + + return 0; +} +subsys_initcall(topology_init); From 956d705dd279f70d5a222375fa97b637d6e8c43d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Mar 2020 09:47:36 +0900 Subject: [PATCH 32/38] riscv: Unaligned load/store handling for M_MODE Add handlers for unaligned load and store traps that may be generated by applications. Code heavily inspired from the OpenSBI project. Handling of the unaligned access traps is suitable for applications compiled with or without compressed instructions and is independent of the kernel CONFIG_RISCV_ISA_C option value. Signed-off-by: Damien Le Moal Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/traps.c | 27 +- arch/riscv/kernel/traps_misaligned.c | 370 +++++++++++++++++++++++++++ 3 files changed, 395 insertions(+), 4 deletions(-) create mode 100644 arch/riscv/kernel/traps_misaligned.c diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index c121cc491eb8..1bad93f63dba 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -30,7 +30,7 @@ obj-y += cacheinfo.o obj-y += patch.o obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += clint.o +obj-$(CONFIG_RISCV_M_MODE) += clint.o traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 23a57b92cd1d..29aef5cbaf65 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -97,12 +97,33 @@ DO_ERROR_INFO(do_trap_insn_fault, SIGSEGV, SEGV_ACCERR, "instruction access fault"); DO_ERROR_INFO(do_trap_insn_illegal, SIGILL, ILL_ILLOPC, "illegal instruction"); -DO_ERROR_INFO(do_trap_load_misaligned, - SIGBUS, BUS_ADRALN, "load address misaligned"); DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); +#ifndef CONFIG_RISCV_M_MODE +DO_ERROR_INFO(do_trap_load_misaligned, + SIGBUS, BUS_ADRALN, "Oops - load address misaligned"); DO_ERROR_INFO(do_trap_store_misaligned, - SIGBUS, BUS_ADRALN, "store (or AMO) address misaligned"); + SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned"); +#else +int handle_misaligned_load(struct pt_regs *regs); +int handle_misaligned_store(struct pt_regs *regs); + +asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) +{ + if (!handle_misaligned_load(regs)) + return; + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); +} + +asmlinkage void do_trap_store_misaligned(struct pt_regs *regs) +{ + if (!handle_misaligned_store(regs)) + return; + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); +} +#endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_u, diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c new file mode 100644 index 000000000000..46c4dafe3ba0 --- /dev/null +++ b/arch/riscv/kernel/traps_misaligned.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define INSN_MATCH_LB 0x3 +#define INSN_MASK_LB 0x707f +#define INSN_MATCH_LH 0x1003 +#define INSN_MASK_LH 0x707f +#define INSN_MATCH_LW 0x2003 +#define INSN_MASK_LW 0x707f +#define INSN_MATCH_LD 0x3003 +#define INSN_MASK_LD 0x707f +#define INSN_MATCH_LBU 0x4003 +#define INSN_MASK_LBU 0x707f +#define INSN_MATCH_LHU 0x5003 +#define INSN_MASK_LHU 0x707f +#define INSN_MATCH_LWU 0x6003 +#define INSN_MASK_LWU 0x707f +#define INSN_MATCH_SB 0x23 +#define INSN_MASK_SB 0x707f +#define INSN_MATCH_SH 0x1023 +#define INSN_MASK_SH 0x707f +#define INSN_MATCH_SW 0x2023 +#define INSN_MASK_SW 0x707f +#define INSN_MATCH_SD 0x3023 +#define INSN_MASK_SD 0x707f + +#define INSN_MATCH_FLW 0x2007 +#define INSN_MASK_FLW 0x707f +#define INSN_MATCH_FLD 0x3007 +#define INSN_MASK_FLD 0x707f +#define INSN_MATCH_FLQ 0x4007 +#define INSN_MASK_FLQ 0x707f +#define INSN_MATCH_FSW 0x2027 +#define INSN_MASK_FSW 0x707f +#define INSN_MATCH_FSD 0x3027 +#define INSN_MASK_FSD 0x707f +#define INSN_MATCH_FSQ 0x4027 +#define INSN_MASK_FSQ 0x707f + +#define INSN_MATCH_C_LD 0x6000 +#define INSN_MASK_C_LD 0xe003 +#define INSN_MATCH_C_SD 0xe000 +#define INSN_MASK_C_SD 0xe003 +#define INSN_MATCH_C_LW 0x4000 +#define INSN_MASK_C_LW 0xe003 +#define INSN_MATCH_C_SW 0xc000 +#define INSN_MASK_C_SW 0xe003 +#define INSN_MATCH_C_LDSP 0x6002 +#define INSN_MASK_C_LDSP 0xe003 +#define INSN_MATCH_C_SDSP 0xe002 +#define INSN_MASK_C_SDSP 0xe003 +#define INSN_MATCH_C_LWSP 0x4002 +#define INSN_MASK_C_LWSP 0xe003 +#define INSN_MATCH_C_SWSP 0xc002 +#define INSN_MASK_C_SWSP 0xe003 + +#define INSN_MATCH_C_FLD 0x2000 +#define INSN_MASK_C_FLD 0xe003 +#define INSN_MATCH_C_FLW 0x6000 +#define INSN_MASK_C_FLW 0xe003 +#define INSN_MATCH_C_FSD 0xa000 +#define INSN_MASK_C_FSD 0xe003 +#define INSN_MATCH_C_FSW 0xe000 +#define INSN_MASK_C_FSW 0xe003 +#define INSN_MATCH_C_FLDSP 0x2002 +#define INSN_MASK_C_FLDSP 0xe003 +#define INSN_MATCH_C_FSDSP 0xa002 +#define INSN_MASK_C_FSDSP 0xe003 +#define INSN_MATCH_C_FLWSP 0x6002 +#define INSN_MASK_C_FLWSP 0xe003 +#define INSN_MATCH_C_FSWSP 0xe002 +#define INSN_MASK_C_FSWSP 0xe003 + +#define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) + +#if defined(CONFIG_64BIT) +#define LOG_REGBYTES 3 +#define XLEN 64 +#else +#define LOG_REGBYTES 2 +#define XLEN 32 +#endif +#define REGBYTES (1 << LOG_REGBYTES) +#define XLEN_MINUS_16 ((XLEN) - 16) + +#define SH_RD 7 +#define SH_RS1 15 +#define SH_RS2 20 +#define SH_RS2C 2 + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ + (RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 1) << 6)) +#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 2) << 6)) +#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 2) << 6)) +#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 3) << 6)) +#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ + (RV_X(x, 7, 2) << 6)) +#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 7, 3) << 6)) +#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) +#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) +#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) + +#define SHIFT_RIGHT(x, y) \ + ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) + +#define REG_MASK \ + ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) + +#define REG_OFFSET(insn, pos) \ + (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) + +#define REG_PTR(insn, pos, regs) \ + (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) + +#define GET_RM(insn) (((insn) >> 12) & 7) + +#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) +#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) +#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) +#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) +#define GET_SP(regs) (*REG_PTR(2, 0, regs)) +#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) +#define IMM_I(insn) ((s32)(insn) >> 20) +#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ + (s32)(((insn) >> 7) & 0x1f)) +#define MASK_FUNCT3 0x7000 + +#define GET_PRECISION(insn) (((insn) >> 25) & 3) +#define GET_RM(insn) (((insn) >> 12) & 7) +#define PRECISION_S 0 +#define PRECISION_D 1 + +#define STR(x) XSTR(x) +#define XSTR(x) #x + +#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ +static inline type load_##type(const type *addr) \ +{ \ + type val; \ + asm (#insn " %0, %1" \ + : "=&r" (val) : "m" (*addr)); \ + return val; \ +} + +#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ +static inline void store_##type(type *addr, type val) \ +{ \ + asm volatile (#insn " %0, %1\n" \ + : : "r" (val), "m" (*addr)); \ +} + +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw) +#if defined(CONFIG_64BIT) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld) +#else +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw) + +static inline u64 load_u64(const u64 *addr) +{ + return load_u32((u32 *)addr) + + ((u64)load_u32((u32 *)addr + 1) << 32); +} + +static inline void store_u64(u64 *addr, u64 val) +{ + store_u32((u32 *)addr, val); + store_u32((u32 *)addr + 1, val >> 32); +} +#endif + +static inline ulong get_insn(ulong mepc) +{ + register ulong __mepc asm ("a2") = mepc; + ulong val, rvc_mask = 3, tmp; + + asm ("and %[tmp], %[addr], 2\n" + "bnez %[tmp], 1f\n" +#if defined(CONFIG_64BIT) + STR(LWU) " %[insn], (%[addr])\n" +#else + STR(LW) " %[insn], (%[addr])\n" +#endif + "and %[tmp], %[insn], %[rvc_mask]\n" + "beq %[tmp], %[rvc_mask], 2f\n" + "sll %[insn], %[insn], %[xlen_minus_16]\n" + "srl %[insn], %[insn], %[xlen_minus_16]\n" + "j 2f\n" + "1:\n" + "lhu %[insn], (%[addr])\n" + "and %[tmp], %[insn], %[rvc_mask]\n" + "bne %[tmp], %[rvc_mask], 2f\n" + "lhu %[tmp], 2(%[addr])\n" + "sll %[tmp], %[tmp], 16\n" + "add %[insn], %[insn], %[tmp]\n" + "2:" + : [insn] "=&r" (val), [tmp] "=&r" (tmp) + : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), + [xlen_minus_16] "i" (XLEN_MINUS_16)); + + return val; +} + +union reg_data { + u8 data_bytes[8]; + ulong data_ulong; + u64 data_u64; +}; + +int handle_misaligned_load(struct pt_regs *regs) +{ + union reg_data val; + unsigned long epc = regs->epc; + unsigned long insn = get_insn(epc); + unsigned long addr = csr_read(mtval); + int i, fp = 0, shift = 0, len = 0; + + regs->epc = 0; + + if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) { + len = 4; +#endif + } else if ((insn & INSN_MASK_FLD) == INSN_MATCH_FLD) { + fp = 1; + len = 8; + } else if ((insn & INSN_MASK_FLW) == INSN_MATCH_FLW) { + fp = 1; + len = 4; + } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) { + len = 2; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) { + len = 2; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); +#endif + } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_C_FLD) == INSN_MATCH_C_FLD) { + fp = 1; + len = 8; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_FLDSP) == INSN_MATCH_C_FLDSP) { + fp = 1; + len = 8; +#if defined(CONFIG_32BIT) + } else if ((insn & INSN_MASK_C_FLW) == INSN_MATCH_C_FLW) { + fp = 1; + len = 4; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_FLWSP) == INSN_MATCH_C_FLWSP) { + fp = 1; + len = 4; +#endif + } else { + regs->epc = epc; + return -1; + } + + val.data_u64 = 0; + for (i = 0; i < len; i++) + val.data_bytes[i] = load_u8((void *)(addr + i)); + + if (fp) + return -1; + SET_RD(insn, regs, val.data_ulong << shift >> shift); + + regs->epc = epc + INSN_LEN(insn); + + return 0; +} + +int handle_misaligned_store(struct pt_regs *regs) +{ + union reg_data val; + unsigned long epc = regs->epc; + unsigned long insn = get_insn(epc); + unsigned long addr = csr_read(mtval); + int i, len = 0; + + regs->epc = 0; + + val.data_ulong = GET_RS2(insn, regs); + + if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) { + len = 4; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { + len = 8; +#endif + } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { + len = 2; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { + len = 8; + val.data_ulong = GET_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + val.data_ulong = GET_RS2C(insn, regs); +#endif + } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { + len = 4; + val.data_ulong = GET_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + val.data_ulong = GET_RS2C(insn, regs); + } else { + regs->epc = epc; + return -1; + } + + for (i = 0; i < len; i++) + store_u8((void *)(addr + i), val.data_bytes[i]); + + regs->epc = epc + INSN_LEN(insn); + + return 0; +} From 335b139057ef79dbede01dea6e8c3f47c2b88802 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Mar 2020 09:47:38 +0900 Subject: [PATCH 33/38] riscv: Add SOC early init support Add a mechanism for early SoC initialization for platforms that need additional hardware initialization not possible through the regular device tree and drivers mechanism. With this, a SoC specific initialization function can be called very early, before DTB parsing is done by parse_dtb() in Linux RISC-V kernel setup code. This can be very useful for early hardware initialization for No-MMU kernels booted directly in M-mode because it is quite likely that no other booting stage exist prior to the No-MMU kernel. Example use of a SoC early initialization is as follows: static void vendor_abc_early_init(const void *fdt) { /* * some early init code here that can use simple matches * against the flat device tree file. */ } SOC_EARLY_INIT_DECLARE("vendor,abc", abc_early_init); This early initialization function is executed only if the flat device tree for the board has a 'compatible = "vendor,abc"' entry; Signed-off-by: Damien Le Moal Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/soc.h | 23 +++++++++++++++++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/head.S | 1 + arch/riscv/kernel/soc.c | 28 ++++++++++++++++++++++++++++ arch/riscv/kernel/vmlinux.lds.S | 6 ++++++ 5 files changed, 59 insertions(+) create mode 100644 arch/riscv/include/asm/soc.h create mode 100644 arch/riscv/kernel/soc.c diff --git a/arch/riscv/include/asm/soc.h b/arch/riscv/include/asm/soc.h new file mode 100644 index 000000000000..7cec1968c8b4 --- /dev/null +++ b/arch/riscv/include/asm/soc.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#ifndef _ASM_RISCV_SOC_H +#define _ASM_RISCV_SOC_H + +#include +#include +#include + +#define SOC_EARLY_INIT_DECLARE(name, compat, fn) \ + static const struct of_device_id __soc_early_init__##name \ + __used __section(__soc_early_init_table) \ + = { .compatible = compat, .data = fn } + +void soc_early_init(void); + +extern unsigned long __soc_early_init_table_start; +extern unsigned long __soc_early_init_table_end; + +#endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 1bad93f63dba..86c83081044f 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -11,6 +11,7 @@ endif extra-y += head.o extra-y += vmlinux.lds +obj-y += soc.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index e5115d5e0b3a..98a406474e7d 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -234,6 +234,7 @@ clear_bss_done: call kasan_early_init #endif /* Start the kernel */ + call soc_early_init call parse_dtb tail start_kernel diff --git a/arch/riscv/kernel/soc.c b/arch/riscv/kernel/soc.c new file mode 100644 index 000000000000..0b3b3dc9ad0f --- /dev/null +++ b/arch/riscv/kernel/soc.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include +#include +#include +#include + +/* + * This is called extremly early, before parse_dtb(), to allow initializing + * SoC hardware before memory or any device driver initialization. + */ +void __init soc_early_init(void) +{ + void (*early_fn)(const void *fdt); + const struct of_device_id *s; + const void *fdt = dtb_early_va; + + for (s = (void *)&__soc_early_init_table_start; + (void *)s < (void *)&__soc_early_init_table_end; s++) { + if (!fdt_node_check_compatible(fdt, 0, s->compatible)) { + early_fn = s->data; + early_fn(fdt); + return; + } + } +} diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 435cd60dca04..0339b6bbe11a 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -28,6 +28,12 @@ SECTIONS __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) + . = ALIGN(8); + __soc_early_init_table : { + __soc_early_init_table_start = .; + KEEP(*(__soc_early_init_table)) + __soc_early_init_table_end = .; + } /* we have to discard exit text and such at runtime, not link time */ .exit.text : { From c48c4a4c7eadb76593965e2b956e4e2b23a4e388 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 09:47:39 +0900 Subject: [PATCH 34/38] riscv: Add Kendryte K210 SoC support Add support for the Kendryte K210 RISC-V SoC. For now, this support only provides a simple sysctl driver allowing to setup the CPU and uart clock. This support is enabled through the new Kconfig option SOC_KENDRYTE and defines the config option CONFIG_K210_SYSCTL to enable the K210 SoC sysctl driver compilation. The sysctl driver also registers an early SoC initialization function allowing enabling the general purpose use of the 2MB of SRAM normally reserved for the SoC AI engine. This initialization function is automatically called before the dt early initialization using the flat dt root node compatible property matching the value "kendryte,k210". Signed-off-by: Christoph Hellwig Signed-off-by: Damien Le Moal [Palmer: Add missing endmenu in Kconfig.socs] Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.socs | 30 ++++ drivers/soc/Kconfig | 1 + drivers/soc/Makefile | 1 + drivers/soc/kendryte/Kconfig | 14 ++ drivers/soc/kendryte/Makefile | 3 + drivers/soc/kendryte/k210-sysctl.c | 248 +++++++++++++++++++++++++++++ 6 files changed, 297 insertions(+) create mode 100644 drivers/soc/kendryte/Kconfig create mode 100644 drivers/soc/kendryte/Makefile create mode 100644 drivers/soc/kendryte/k210-sysctl.c diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index d325b67d00df..69071578e181 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -10,4 +10,34 @@ config SOC_SIFIVE help This enables support for SiFive SoC platform hardware. +config SOC_VIRT + bool "QEMU Virt Machine" + select VIRTIO_PCI + select VIRTIO_BALLOON + select VIRTIO_MMIO + select VIRTIO_CONSOLE + select VIRTIO_NET + select NET_9P_VIRTIO + select VIRTIO_BLK + select SCSI_VIRTIO + select DRM_VIRTIO_GPU + select HW_RANDOM_VIRTIO + select RPMSG_CHAR + select RPMSG_VIRTIO + select CRYPTO_DEV_VIRTIO + select VIRTIO_INPUT + select POWER_RESET_SYSCON + select POWER_RESET_SYSCON_POWEROFF + select GOLDFISH + select RTC_DRV_GOLDFISH + select SIFIVE_PLIC + help + This enables support for QEMU Virt Machine. + +config SOC_KENDRYTE + bool "Kendryte K210 SoC" + depends on !MMU + help + This enables support for Kendryte K210 SoC platform hardware. + endmenu diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig index 1778f8c62861..425ab6f7e375 100644 --- a/drivers/soc/Kconfig +++ b/drivers/soc/Kconfig @@ -22,5 +22,6 @@ source "drivers/soc/ux500/Kconfig" source "drivers/soc/versatile/Kconfig" source "drivers/soc/xilinx/Kconfig" source "drivers/soc/zte/Kconfig" +source "drivers/soc/kendryte/Kconfig" endmenu diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index 8b49d782a1ab..af58063bb989 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_ARCH_U8500) += ux500/ obj-$(CONFIG_PLAT_VERSATILE) += versatile/ obj-y += xilinx/ obj-$(CONFIG_ARCH_ZX) += zte/ +obj-$(CONFIG_SOC_KENDRYTE) += kendryte/ diff --git a/drivers/soc/kendryte/Kconfig b/drivers/soc/kendryte/Kconfig new file mode 100644 index 000000000000..49785b1b0217 --- /dev/null +++ b/drivers/soc/kendryte/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +if SOC_KENDRYTE + +config K210_SYSCTL + bool "Kendryte K210 system controller" + default y + depends on RISCV + help + Enables controlling the K210 various clocks and to enable + general purpose use of the extra 2MB of SRAM normally + reserved for the AI engine. + +endif diff --git a/drivers/soc/kendryte/Makefile b/drivers/soc/kendryte/Makefile new file mode 100644 index 000000000000..002d9ce95c0d --- /dev/null +++ b/drivers/soc/kendryte/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_K210_SYSCTL) += k210-sysctl.o diff --git a/drivers/soc/kendryte/k210-sysctl.c b/drivers/soc/kendryte/k210-sysctl.c new file mode 100644 index 000000000000..4608fbca20e1 --- /dev/null +++ b/drivers/soc/kendryte/k210-sysctl.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2019 Christoph Hellwig. + * Copyright (c) 2019 Western Digital Corporation or its affiliates. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define K210_SYSCTL_CLK0_FREQ 26000000UL + +/* Registers base address */ +#define K210_SYSCTL_SYSCTL_BASE_ADDR 0x50440000ULL + +/* Registers */ +#define K210_SYSCTL_PLL0 0x08 +#define K210_SYSCTL_PLL1 0x0c +/* clkr: 4bits, clkf1: 6bits, clkod: 4bits, bwadj: 4bits */ +#define PLL_RESET (1 << 20) +#define PLL_PWR (1 << 21) +#define PLL_INTFB (1 << 22) +#define PLL_BYPASS (1 << 23) +#define PLL_TEST (1 << 24) +#define PLL_OUT_EN (1 << 25) +#define PLL_TEST_EN (1 << 26) +#define K210_SYSCTL_PLL_LOCK 0x18 +#define PLL0_LOCK1 (1 << 0) +#define PLL0_LOCK2 (1 << 1) +#define PLL0_SLIP_CLEAR (1 << 2) +#define PLL0_TEST_CLK_OUT (1 << 3) +#define PLL1_LOCK1 (1 << 8) +#define PLL1_LOCK2 (1 << 9) +#define PLL1_SLIP_CLEAR (1 << 10) +#define PLL1_TEST_CLK_OUT (1 << 11) +#define PLL2_LOCK1 (1 << 16) +#define PLL2_LOCK2 (1 << 16) +#define PLL2_SLIP_CLEAR (1 << 18) +#define PLL2_TEST_CLK_OUT (1 << 19) +#define K210_SYSCTL_CLKSEL0 0x20 +#define CLKSEL_ACLK (1 << 0) +#define K210_SYSCTL_CLKEN_CENT 0x28 +#define CLKEN_CPU (1 << 0) +#define CLKEN_SRAM0 (1 << 1) +#define CLKEN_SRAM1 (1 << 2) +#define CLKEN_APB0 (1 << 3) +#define CLKEN_APB1 (1 << 4) +#define CLKEN_APB2 (1 << 5) +#define K210_SYSCTL_CLKEN_PERI 0x2c +#define CLKEN_ROM (1 << 0) +#define CLKEN_DMA (1 << 1) +#define CLKEN_AI (1 << 2) +#define CLKEN_DVP (1 << 3) +#define CLKEN_FFT (1 << 4) +#define CLKEN_GPIO (1 << 5) +#define CLKEN_SPI0 (1 << 6) +#define CLKEN_SPI1 (1 << 7) +#define CLKEN_SPI2 (1 << 8) +#define CLKEN_SPI3 (1 << 9) +#define CLKEN_I2S0 (1 << 10) +#define CLKEN_I2S1 (1 << 11) +#define CLKEN_I2S2 (1 << 12) +#define CLKEN_I2C0 (1 << 13) +#define CLKEN_I2C1 (1 << 14) +#define CLKEN_I2C2 (1 << 15) +#define CLKEN_UART1 (1 << 16) +#define CLKEN_UART2 (1 << 17) +#define CLKEN_UART3 (1 << 18) +#define CLKEN_AES (1 << 19) +#define CLKEN_FPIO (1 << 20) +#define CLKEN_TIMER0 (1 << 21) +#define CLKEN_TIMER1 (1 << 22) +#define CLKEN_TIMER2 (1 << 23) +#define CLKEN_WDT0 (1 << 24) +#define CLKEN_WDT1 (1 << 25) +#define CLKEN_SHA (1 << 26) +#define CLKEN_OTP (1 << 27) +#define CLKEN_RTC (1 << 29) + +struct k210_sysctl { + void __iomem *regs; + struct clk_hw hw; +}; + +static void k210_set_bits(u32 val, void __iomem *reg) +{ + writel(readl(reg) | val, reg); +} + +static void k210_clear_bits(u32 val, void __iomem *reg) +{ + writel(readl(reg) & ~val, reg); +} + +static void k210_pll1_enable(void __iomem *regs) +{ + u32 val; + + val = readl(regs + K210_SYSCTL_PLL1); + val &= ~GENMASK(19, 0); /* clkr1 = 0 */ + val |= FIELD_PREP(GENMASK(9, 4), 0x3B); /* clkf1 = 59 */ + val |= FIELD_PREP(GENMASK(13, 10), 0x3); /* clkod1 = 3 */ + val |= FIELD_PREP(GENMASK(19, 14), 0x3B); /* bwadj1 = 59 */ + writel(val, regs + K210_SYSCTL_PLL1); + + k210_clear_bits(PLL_BYPASS, regs + K210_SYSCTL_PLL1); + k210_set_bits(PLL_PWR, regs + K210_SYSCTL_PLL1); + + /* + * Reset the pll. The magic NOPs come from the Kendryte reference SDK. + */ + k210_clear_bits(PLL_RESET, regs + K210_SYSCTL_PLL1); + k210_set_bits(PLL_RESET, regs + K210_SYSCTL_PLL1); + nop(); + nop(); + k210_clear_bits(PLL_RESET, regs + K210_SYSCTL_PLL1); + + for (;;) { + val = readl(regs + K210_SYSCTL_PLL_LOCK); + if (val & PLL1_LOCK2) + break; + writel(val | PLL1_SLIP_CLEAR, regs + K210_SYSCTL_PLL_LOCK); + } + + k210_set_bits(PLL_OUT_EN, regs + K210_SYSCTL_PLL1); +} + +static unsigned long k210_sysctl_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct k210_sysctl *s = container_of(hw, struct k210_sysctl, hw); + u32 clksel0, pll0; + u64 pll0_freq, clkr0, clkf0, clkod0; + + /* + * If the clock selector is not set, use the base frequency. + * Otherwise, use PLL0 frequency with a frequency divisor. + */ + clksel0 = readl(s->regs + K210_SYSCTL_CLKSEL0); + if (!(clksel0 & CLKSEL_ACLK)) + return K210_SYSCTL_CLK0_FREQ; + + /* + * Get PLL0 frequency: + * freq = base frequency * clkf0 / (clkr0 * clkod0) + */ + pll0 = readl(s->regs + K210_SYSCTL_PLL0); + clkr0 = 1 + FIELD_GET(GENMASK(3, 0), pll0); + clkf0 = 1 + FIELD_GET(GENMASK(9, 4), pll0); + clkod0 = 1 + FIELD_GET(GENMASK(13, 10), pll0); + pll0_freq = clkf0 * K210_SYSCTL_CLK0_FREQ / (clkr0 * clkod0); + + /* Get the frequency divisor from the clock selector */ + return pll0_freq / (2ULL << FIELD_GET(0x00000006, clksel0)); +} + +static const struct clk_ops k210_sysctl_clk_ops = { + .recalc_rate = k210_sysctl_clk_recalc_rate, +}; + +static const struct clk_init_data k210_clk_init_data = { + .name = "k210-sysctl-pll1", + .ops = &k210_sysctl_clk_ops, +}; + +static int k210_sysctl_probe(struct platform_device *pdev) +{ + struct k210_sysctl *s; + int error; + + pr_info("Kendryte K210 SoC sysctl\n"); + + s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->regs = devm_ioremap_resource(&pdev->dev, + platform_get_resource(pdev, IORESOURCE_MEM, 0)); + if (IS_ERR(s->regs)) + return PTR_ERR(s->regs); + + s->hw.init = &k210_clk_init_data; + error = devm_clk_hw_register(&pdev->dev, &s->hw); + if (error) { + dev_err(&pdev->dev, "failed to register clk"); + return error; + } + + error = devm_of_clk_add_hw_provider(&pdev->dev, of_clk_hw_simple_get, + &s->hw); + if (error) { + dev_err(&pdev->dev, "adding clk provider failed\n"); + return error; + } + + return 0; +} + +static const struct of_device_id k210_sysctl_of_match[] = { + { .compatible = "kendryte,k210-sysctl", }, + {} +}; + +static struct platform_driver k210_sysctl_driver = { + .driver = { + .name = "k210-sysctl", + .of_match_table = k210_sysctl_of_match, + }, + .probe = k210_sysctl_probe, +}; + +static int __init k210_sysctl_init(void) +{ + return platform_driver_register(&k210_sysctl_driver); +} +core_initcall(k210_sysctl_init); + +/* + * This needs to be called very early during initialization, given that + * PLL1 needs to be enabled to be able to use all SRAM. + */ +static void __init k210_soc_early_init(const void *fdt) +{ + void __iomem *regs; + + regs = ioremap(K210_SYSCTL_SYSCTL_BASE_ADDR, 0x1000); + if (!regs) + panic("K210 sysctl ioremap"); + + /* Enable PLL1 to make the KPU SRAM useable */ + k210_pll1_enable(regs); + + k210_set_bits(PLL_OUT_EN, regs + K210_SYSCTL_PLL0); + + k210_set_bits(CLKEN_CPU | CLKEN_SRAM0 | CLKEN_SRAM1, + regs + K210_SYSCTL_CLKEN_CENT); + k210_set_bits(CLKEN_ROM | CLKEN_TIMER0 | CLKEN_RTC, + regs + K210_SYSCTL_CLKEN_PERI); + + k210_set_bits(CLKSEL_ACLK, regs + K210_SYSCTL_CLKSEL0); + + iounmap(regs); +} +SOC_EARLY_INIT_DECLARE(generic_k210, "kendryte,k210", k210_soc_early_init); From 8759a42bf1d04232835ed9287860fe6c124f3aac Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Mar 2020 09:47:40 +0900 Subject: [PATCH 35/38] riscv: Select required drivers for Kendryte SOC This patch selects drivers required for the Kendryte K210 SOC. Since K210 SoC based boards do not provide a device tree, this patch also enables the BUILTIN_DTB option. Signed-off-by: Damien Le Moal Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.socs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 69071578e181..a843100124ae 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -37,6 +37,10 @@ config SOC_VIRT config SOC_KENDRYTE bool "Kendryte K210 SoC" depends on !MMU + select BUILTIN_DTB + select SERIAL_SIFIVE if TTY + select SERIAL_SIFIVE_CONSOLE if TTY + select SIFIVE_PLIC help This enables support for Kendryte K210 SoC platform hardware. From 5ba568f57f0ae4826beb6aaeecb12e68219b8a0b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Mar 2020 09:47:41 +0900 Subject: [PATCH 36/38] riscv: Add Kendryte K210 device tree Add a generic device tree for Kendryte K210 SoC based boards. This is for now a very simple device tree describing the core elements of the SoC. This is suitable (and tested) for the Kendryte KD233 development board, the Sipeed MAIX M1 Dan Dock board and the Sipeed MAIXDUINO board. Signed-off-by: Damien Le Moal Reviewed-by: Sean Anderson Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/Makefile | 1 + arch/riscv/boot/dts/kendryte/Makefile | 2 + arch/riscv/boot/dts/kendryte/k210.dts | 23 +++++ arch/riscv/boot/dts/kendryte/k210.dtsi | 123 +++++++++++++++++++++++++ include/dt-bindings/clock/k210-clk.h | 20 ++++ 5 files changed, 169 insertions(+) create mode 100644 arch/riscv/boot/dts/kendryte/Makefile create mode 100644 arch/riscv/boot/dts/kendryte/k210.dts create mode 100644 arch/riscv/boot/dts/kendryte/k210.dtsi create mode 100644 include/dt-bindings/clock/k210-clk.h diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index dcc3ada78455..557f0b519c8e 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 subdir-y += sifive +subdir-y += kendryte diff --git a/arch/riscv/boot/dts/kendryte/Makefile b/arch/riscv/boot/dts/kendryte/Makefile new file mode 100644 index 000000000000..815444e69e89 --- /dev/null +++ b/arch/riscv/boot/dts/kendryte/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_SOC_KENDRYTE) += k210.dtb diff --git a/arch/riscv/boot/dts/kendryte/k210.dts b/arch/riscv/boot/dts/kendryte/k210.dts new file mode 100644 index 000000000000..0d1f28fce6b2 --- /dev/null +++ b/arch/riscv/boot/dts/kendryte/k210.dts @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +/dts-v1/; + +#include "k210.dtsi" + +/ { + model = "Kendryte K210 generic"; + compatible = "kendryte,k210"; + + chosen { + bootargs = "earlycon console=ttySIF0"; + stdout-path = "serial0"; + }; +}; + +&uarths0 { + status = "okay"; +}; + diff --git a/arch/riscv/boot/dts/kendryte/k210.dtsi b/arch/riscv/boot/dts/kendryte/k210.dtsi new file mode 100644 index 000000000000..c1df56ccb8d5 --- /dev/null +++ b/arch/riscv/boot/dts/kendryte/k210.dtsi @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Sean Anderson + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include + +/ { + /* + * Although the K210 is a 64-bit CPU, the address bus is only 32-bits + * wide, and the upper half of all addresses is ignored. + */ + #address-cells = <1>; + #size-cells = <1>; + compatible = "kendryte,k210"; + + aliases { + serial0 = &uarths0; + }; + + /* + * The K210 has an sv39 MMU following the priviledge specification v1.9. + * Since this is a non-ratified draft specification, the kernel does not + * support it and the K210 support enabled only for the !MMU case. + * Be consistent with this by setting the CPUs MMU type to "none". + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + timebase-frequency = <7800000>; + cpu0: cpu@0 { + device_type = "cpu"; + reg = <0>; + compatible = "kendryte,k210", "sifive,rocket0", "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "none"; + i-cache-size = <0x8000>; + i-cache-block-size = <64>; + d-cache-size = <0x8000>; + d-cache-block-size = <64>; + clocks = <&sysctl K210_CLK_CPU>; + clock-frequency = <390000000>; + cpu0_intc: interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; + cpu1: cpu@1 { + device_type = "cpu"; + reg = <1>; + compatible = "kendryte,k210", "sifive,rocket0", "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "none"; + i-cache-size = <0x8000>; + i-cache-block-size = <64>; + d-cache-size = <0x8000>; + d-cache-block-size = <64>; + clocks = <&sysctl K210_CLK_CPU>; + clock-frequency = <390000000>; + cpu1_intc: interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; + }; + + sram: memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x400000>, + <0x80400000 0x200000>, + <0x80600000 0x200000>; + reg-names = "sram0", "sram1", "aisram"; + }; + + clocks { + in0: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; + }; + }; + + soc { + #address-cells = <1>; + #size-cells = <1>; + compatible = "kendryte,k210-soc", "simple-bus"; + ranges; + interrupt-parent = <&plic0>; + + sysctl: sysctl@50440000 { + compatible = "kendryte,k210-sysctl", "simple-mfd"; + reg = <0x50440000 0x1000>; + #clock-cells = <1>; + }; + + clint0: interrupt-controller@2000000 { + compatible = "riscv,clint0"; + reg = <0x2000000 0xC000>; + interrupts-extended = <&cpu0_intc 3>, <&cpu1_intc 3>; + clocks = <&sysctl K210_CLK_ACLK>; + }; + + plic0: interrupt-controller@c000000 { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "kendryte,k210-plic0", "riscv,plic0"; + reg = <0xC000000 0x4000000>; + interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 0xffffffff>, + <&cpu1_intc 11>, <&cpu1_intc 0xffffffff>; + riscv,ndev = <65>; + riscv,max-priority = <7>; + }; + + uarths0: serial@38000000 { + compatible = "kendryte,k210-uarths", "sifive,uart0"; + reg = <0x38000000 0x1000>; + interrupts = <33>; + clocks = <&sysctl K210_CLK_CPU>; + }; + }; +}; diff --git a/include/dt-bindings/clock/k210-clk.h b/include/dt-bindings/clock/k210-clk.h new file mode 100644 index 000000000000..5a2fd64d1a49 --- /dev/null +++ b/include/dt-bindings/clock/k210-clk.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019-20 Sean Anderson + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ +#ifndef K210_CLK_H +#define K210_CLK_H + +/* + * Arbitrary identifiers for clocks. + * The structure is: in0 -> pll0 -> aclk -> cpu + * + * Since we use the hardware defaults for now, set all these to the same clock. + */ +#define K210_CLK_PLL0 0 +#define K210_CLK_PLL1 0 +#define K210_CLK_ACLK 0 +#define K210_CLK_CPU 0 + +#endif /* K210_CLK_H */ From aa10eb6bb8a9c12d238e77fdd470db60cd7fb769 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 16 Mar 2020 09:47:42 +0900 Subject: [PATCH 37/38] riscv: Kendryte K210 default config This patch adds a defconfig file to build No-MMU kernels meant for boards based on the Kendryte K210 SoC. Signed-off-by: Damien Le Moal Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/nommu_k210_defconfig | 68 +++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 arch/riscv/configs/nommu_k210_defconfig diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig new file mode 100644 index 000000000000..632aa2f95e57 --- /dev/null +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -0,0 +1,68 @@ +# CONFIG_CPU_ISOLATION is not set +CONFIG_LOG_BUF_SHIFT=15 +CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_INITRAMFS_FORCE=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +# CONFIG_BOOT_CONFIG is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_SYSFS_SYSCALL is not set +# CONFIG_FHANDLE is not set +# CONFIG_BASE_FULL is not set +# CONFIG_EPOLL is not set +# CONFIG_SIGNALFD is not set +# CONFIG_TIMERFD is not set +# CONFIG_EVENTFD is not set +# CONFIG_AIO is not set +# CONFIG_IO_URING is not set +# CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_MEMBARRIER is not set +# CONFIG_KALLSYMS is not set +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLOB=y +# CONFIG_SLAB_MERGE_DEFAULT is not set +# CONFIG_MMU is not set +CONFIG_SOC_KENDRYTE=y +CONFIG_MAXPHYSMEM_2GB=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +CONFIG_CMDLINE="earlycon console=ttySIF0" +CONFIG_CMDLINE_FORCE=y +CONFIG_USE_BUILTIN_DTB=y +CONFIG_BUILTIN_DTB_SOURCE="kendryte/k210" +# CONFIG_BLOCK is not set +CONFIG_BINFMT_FLAT=y +# CONFIG_COREDUMP is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_LDISC_AUTOLOAD is not set +# CONFIG_DEVMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_LSM="[]" +CONFIG_PRINTK_TIME=y +# CONFIG_DEBUG_MISC is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_RCU_TRACE is not set +# CONFIG_FTRACE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set From 37809df4b1c88927fe944eb766e0553811c51f64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 09:47:43 +0900 Subject: [PATCH 38/38] riscv: create a loader.bin boot image for Kendryte SoC Create the loader.bin bootable image file that can be loaded into Kendryte K210 based boards using the kflash.py tool with the command: kflash.py/kflash.py -t arch/riscv/boot/loader.bin Signed-off-by: Christoph Hellwig Signed-off-by: Damien Le Moal Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 6 +++--- arch/riscv/boot/Makefile | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index b9009a2fbaf5..0e94ff8ec1f7 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -83,12 +83,12 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ -ifeq ($(CONFIG_RISCV_M_MODE),y) -KBUILD_IMAGE := $(boot)/loader +ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_KENDRYTE),yy) +KBUILD_IMAGE := $(boot)/loader.bin else KBUILD_IMAGE := $(boot)/Image.gz endif -BOOT_TARGETS := Image Image.gz loader +BOOT_TARGETS := Image Image.gz loader loader.bin all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 36db8145f9f4..3530c59b3ea7 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -41,6 +41,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/loader.bin: $(obj)/loader FORCE + $(call if_changed,objcopy) + install: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)"