From f630c1bdfbf8fe423325beaf60027cfc7fd7c610 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 15 Sep 2011 11:45:15 +0100 Subject: [PATCH 1/5] ARM: 7091/1: errata: D-cache line maintenance operation by MVA may not succeed This patch implements a workaround for erratum 764369 affecting Cortex-A9 MPCore with two or more processors (all current revisions). Under certain timing circumstances, a data cache line maintenance operation by MVA targeting an Inner Shareable memory region may fail to proceed up to either the Point of Coherency or to the Point of Unification of the system. This workaround adds a DSB instruction before the relevant cache maintenance functions and sets a specific bit in the diagnostic control register of the SCU. Cc: Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Tested-by: Mark Rutland Signed-off-by: Russell King --- arch/arm/Kconfig | 14 ++++++++++++++ arch/arm/kernel/smp_scu.c | 10 ++++++++++ arch/arm/mm/cache-v7.S | 20 ++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3269576dbfa8..3146ed3f6eca 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1283,6 +1283,20 @@ config ARM_ERRATA_364296 processor into full low interrupt latency mode. ARM11MPCore is not affected. +config ARM_ERRATA_764369 + bool "ARM errata: Data cache line maintenance operation by MVA may not succeed" + depends on CPU_V7 && SMP + help + This option enables the workaround for erratum 764369 + affecting Cortex-A9 MPCore with two or more processors (all + current revisions). Under certain timing circumstances, a data + cache line maintenance operation by MVA targeting an Inner + Shareable memory region may fail to proceed up to either the + Point of Coherency or to the Point of Unification of the + system. This workaround adds a DSB instruction before the + relevant cache maintenance functions and sets a specific bit + in the diagnostic control register of the SCU. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 79ed5e7f204a..7fcddb75c877 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -13,6 +13,7 @@ #include #include +#include #define SCU_CTRL 0x00 #define SCU_CONFIG 0x04 @@ -37,6 +38,15 @@ void __init scu_enable(void __iomem *scu_base) { u32 scu_ctrl; +#ifdef CONFIG_ARM_ERRATA_764369 + /* Cortex-A9 only */ + if ((read_cpuid(CPUID_ID) & 0xff0ffff0) == 0x410fc090) { + scu_ctrl = __raw_readl(scu_base + 0x30); + if (!(scu_ctrl & 1)) + __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); + } +#endif + scu_ctrl = __raw_readl(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 3b24bfa3b828..07c4bc8ea0a4 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -174,6 +174,10 @@ ENTRY(v7_coherent_user_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification add r12, r12, r2 @@ -223,6 +227,10 @@ ENTRY(v7_flush_kern_dcache_area) add r1, r0, r1 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line add r0, r0, r2 @@ -247,6 +255,10 @@ v7_dma_inv_range: sub r3, r2, #1 tst r0, r3 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line tst r1, r3 @@ -270,6 +282,10 @@ v7_dma_clean_range: dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c10, 1 @ clean D / U line add r0, r0, r2 @@ -288,6 +304,10 @@ ENTRY(v7_dma_flush_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line add r0, r0, r2 From 4dfa33868d9f8595717047eb058322e0999691cc Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 17 Sep 2011 12:50:27 +0100 Subject: [PATCH 2/5] ARM: nommu: fix warning with checksyscalls.sh CALL scripts/checksyscalls.sh :46:1: warning: "__IGNORE_migrate_pages" redefined In file included from :2: arch/arm/include/asm/unistd.h:482:1: warning: this is the location of the previous definition This is caused because we define __IGNORE_migrate_pages to be 1, but in the case of nommu, it's defined to be empty. Fix this by just defining the __IGNORE_ symbols to be empty. Signed-off-by: Russell King --- arch/arm/include/asm/unistd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 2c04ed5efeb5..c60a2944f95b 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -478,8 +478,8 @@ /* * Unimplemented (or alternatively implemented) syscalls */ -#define __IGNORE_fadvise64_64 1 -#define __IGNORE_migrate_pages 1 +#define __IGNORE_fadvise64_64 +#define __IGNORE_migrate_pages #endif /* __KERNEL__ */ #endif /* __ASM_ARM_UNISTD_H */ From 6760b109603c794e4bd281c0014fef069c019b6a Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 20 Sep 2011 23:35:15 +0100 Subject: [PATCH 3/5] ARM: fix vmlinux.lds.S discarding sections We are seeing linker errors caused by sections being discarded, despite the linker script trying to keep them. The result is (eg): `.exit.text' referenced in section `.alt.smp.init' of drivers/built-in.o: defined in discarded section `.exit.text' of drivers/built-in.o `.exit.text' referenced in section `.alt.smp.init' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o This is the relevent part of the linker script (reformatted to make it clearer): | SECTIONS | { | /* | * unwind exit sections must be discarded before the rest of the | * unwind sections get included. | */ | /DISCARD/ : { | *(.ARM.exidx.exit.text) | *(.ARM.extab.exit.text) | } | ... | .exit.text : { | *(.exit.text) | *(.memexit.text) | } | ... | /DISCARD/ : { | *(.exit.text) | *(.memexit.text) | *(.exit.data) | *(.memexit.data) | *(.memexit.rodata) | *(.exitcall.exit) | *(.discard) | *(.discard.*) | } | } Now, this is what the linker manual says about discarded output sections: | The special output section name `/DISCARD/' may be used to discard | input sections. Any input sections which are assigned to an output | section named `/DISCARD/' are not included in the output file. No questions, no exceptions. It doesn't say "unless they are listed before the /DISCARD/ section." Now, this is what asn-generic/vmlinux.lds.S says: | /* | * Default discarded sections. | * | * Some archs want to discard exit text/data at runtime rather than | * link time due to cross-section references such as alt instructions, | * bug table, eh_frame, etc. DISCARDS must be the last of output | * section definitions so that such archs put those in earlier section | * definitions. | */ And guess what - the list _always_ includes .exit.text etc. Now, what's actually happening is that the linker is reading the script, and it finds the first /DISCARD/ output section at the beginning of the script. It continues reading the script, and finds the 'DISCARD' macro at the end, which having been postprocessed results in another /DISCARD/ output section. As the linker already contains the earlier /DISCARD/ output section, it adds it to that existing section, so it effectively is placed at the start. This can be seen by using the -M option to ld: | Linker script and memory map | | 0xc037c080 jiffies = jiffies_64 | | /DISCARD/ | *(.ARM.exidx.exit.text) | *(.ARM.extab.exit.text) | *(.exit.text) | *(.memexit.text) | *(.exit.data) | *(.memexit.data) | *(.memexit.rodata) | *(.exitcall.exit) | *(.discard) | *(.discard.*) | | 0xc0008000 . = 0xc0008000 | | .head.text 0xc0008000 0x1d0 | 0xc0008000 _text = . | *(.head.text) | .head.text 0xc0008000 0x1d0 arch/arm/kernel/head.o | 0xc0008000 stext | | .text 0xc0008200 0x2d78d0 | 0xc0008200 _stext = . | 0xc0008200 __exception_text_start = . | *(.exception.text) | .exception.text | ... As you can see, all the discarded sections are grouped together - and as a result of it being the first output section, they all appear before any other section. The result is that not only is the unwind information discarded (as intended), but also the .exit.text, despite us wanting to have the .exit.text preserved. We can't move the unwind information elsewhere, because it'll then be included even when we do actually discard the .exit.text (and similar) sections. So, work around this by avoiding the generic DISCARDS macro, and instead conditionalize the sections to be discarded ourselves. This avoids the ambiguity in how the linker assigns input sections to output sections, making our script less dependent on undocumented linker behaviour. Reported-by: Rob Herring Tested-by: Mark Brown Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bf977f8514f6..4e66f62b8d41 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -23,8 +23,10 @@ #if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK) #define ARM_EXIT_KEEP(x) x +#define ARM_EXIT_DISCARD(x) #else #define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x #endif OUTPUT_ARCH(arm) @@ -39,6 +41,11 @@ jiffies = jiffies_64 + 4; SECTIONS { /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + * * unwind exit sections must be discarded before the rest of the * unwind sections get included. */ @@ -47,6 +54,9 @@ SECTIONS *(.ARM.extab.exit.text) ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL #ifndef CONFIG_HOTPLUG *(.ARM.exidx.devexit.text) *(.ARM.extab.devexit.text) @@ -58,6 +68,8 @@ SECTIONS #ifndef CONFIG_SMP_ON_UP *(.alt.smp.init) #endif + *(.discard) + *(.discard.*) } #ifdef CONFIG_XIP_KERNEL @@ -279,9 +291,6 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } - - /* Default discards */ - DISCARDS } /* From d8e89b47e00ee80e920761145144640aac4cf71a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 22 Sep 2011 10:32:25 +0100 Subject: [PATCH 4/5] ARM: dma-mapping: free allocated page if unable to map If the attempt to map a page for DMA fails (eg, because we're out of mapping space) then we must not hold on to the page we allocated for DMA - doing so will result in a memory leak. Cc: Reported-by: Bryan Phillippe Tested-by: Bryan Phillippe Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 0a0a1e7c20d2..c3ff82f92d9c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -324,6 +324,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); + else + __dma_free_buffer(page, size); return addr; } From df77abcafc8dc881b6c9347548651777088e4b27 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 23 Sep 2011 14:34:12 +0100 Subject: [PATCH 5/5] ARM: 7099/1: futex: preserve oldval in SMP __futex_atomic_op The SMP implementation of __futex_atomic_op clobbers oldval with the status flag from the exclusive store. This causes it to always read as zero when performing the FUTEX_OP_CMP_* operation. This patch updates the ARM __futex_atomic_op implementations to take a tmp argument, allowing us to store the strex status flag without overwriting the register containing oldval. Cc: stable@kernel.org Reported-by: Minho Ban Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/futex.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 8c73900da9ed..253cc86318bf 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -25,17 +25,17 @@ #ifdef CONFIG_SMP -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ smp_mb(); \ __asm__ __volatile__( \ - "1: ldrex %1, [%2]\n" \ + "1: ldrex %1, [%3]\n" \ " " insn "\n" \ - "2: strex %1, %0, [%2]\n" \ - " teq %1, #0\n" \ + "2: strex %2, %0, [%3]\n" \ + " teq %2, #0\n" \ " bne 1b\n" \ " mov %0, #0\n" \ - __futex_atomic_ex_table("%4") \ - : "=&r" (ret), "=&r" (oldval) \ + __futex_atomic_ex_table("%5") \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") @@ -73,14 +73,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #include #include -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1, [%2]\n" \ + "1: " T(ldr) " %1, [%3]\n" \ " " insn "\n" \ - "2: " T(str) " %0, [%2]\n" \ + "2: " T(str) " %0, [%3]\n" \ " mov %0, #0\n" \ - __futex_atomic_ex_table("%4") \ - : "=&r" (ret), "=&r" (oldval) \ + __futex_atomic_ex_table("%5") \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") @@ -117,7 +117,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret; + int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; @@ -129,19 +129,19 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("mov %0, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("add %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("orr %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %0, %1, %3", ret, oldval, uaddr, ~oparg); + __futex_atomic_op("and %0, %1, %4", ret, oldval, tmp, uaddr, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %0, %1, %3", ret, oldval, uaddr, oparg); + __futex_atomic_op("eor %0, %1, %4", ret, oldval, tmp, uaddr, oparg); break; default: ret = -ENOSYS;