kernel_optimize_test/arch/sparc64/kernel/trampoline.S
David S. Miller b445e26cbf [SPARC64]: Avoid membar instructions in delay slots.
In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: David S. Miller <davem@davemloft.net>
2005-06-27 15:42:04 -07:00

370 lines
8.0 KiB
ArmAsm

/* $Id: trampoline.S,v 1.26 2002/02/09 19:49:30 davem Exp $
* trampoline.S: Jump start slave processors on sparc64.
*
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*/
#include <asm/head.h>
#include <asm/asi.h>
#include <asm/lsu.h>
#include <asm/dcr.h>
#include <asm/dcu.h>
#include <asm/pstate.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/spitfire.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/mmu.h>
.data
.align 8
call_method:
.asciz "call-method"
.align 8
itlb_load:
.asciz "SUNW,itlb-load"
.align 8
dtlb_load:
.asciz "SUNW,dtlb-load"
.text
.align 8
.globl sparc64_cpu_startup, sparc64_cpu_startup_end
sparc64_cpu_startup:
flushw
BRANCH_IF_CHEETAH_BASE(g1,g5,cheetah_startup)
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g5,cheetah_plus_startup)
ba,pt %xcc, spitfire_startup
nop
cheetah_plus_startup:
/* Preserve OBP chosen DCU and DCR register settings. */
ba,pt %xcc, cheetah_generic_startup
nop
cheetah_startup:
mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
wr %g1, %asr18
sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
sllx %g5, 32, %g5
or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
stxa %g5, [%g0] ASI_DCU_CONTROL_REG
membar #Sync
cheetah_generic_startup:
mov TSB_EXTENSION_P, %g3
stxa %g0, [%g3] ASI_DMMU
stxa %g0, [%g3] ASI_IMMU
membar #Sync
mov TSB_EXTENSION_S, %g3
stxa %g0, [%g3] ASI_DMMU
membar #Sync
mov TSB_EXTENSION_N, %g3
stxa %g0, [%g3] ASI_DMMU
stxa %g0, [%g3] ASI_IMMU
membar #Sync
/* Disable STICK_INT interrupts. */
sethi %hi(0x80000000), %g5
sllx %g5, 32, %g5
wr %g5, %asr25
ba,pt %xcc, startup_continue
nop
spitfire_startup:
mov (LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1
stxa %g1, [%g0] ASI_LSU_CONTROL
membar #Sync
startup_continue:
wrpr %g0, 15, %pil
sethi %hi(0x80000000), %g2
sllx %g2, 32, %g2
wr %g2, 0, %tick_cmpr
/* Call OBP by hand to lock KERNBASE into i/d tlbs.
* We lock 2 consequetive entries if we are 'bigkernel'.
*/
mov %o0, %l0
sethi %hi(prom_entry_lock), %g2
1: ldstub [%g2 + %lo(prom_entry_lock)], %g1
membar #StoreLoad | #StoreStore
brnz,pn %g1, 1b
nop
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
ldx [%g2 + 0x10], %l2
mov %sp, %l1
add %l2, -(192 + 128), %sp
flushw
sethi %hi(call_method), %g2
or %g2, %lo(call_method), %g2
stx %g2, [%sp + 2047 + 128 + 0x00]
mov 5, %g2
stx %g2, [%sp + 2047 + 128 + 0x08]
mov 1, %g2
stx %g2, [%sp + 2047 + 128 + 0x10]
sethi %hi(itlb_load), %g2
or %g2, %lo(itlb_load), %g2
stx %g2, [%sp + 2047 + 128 + 0x18]
sethi %hi(mmu_ihandle_cache), %g2
lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
stx %g2, [%sp + 2047 + 128 + 0x20]
sethi %hi(KERNBASE), %g2
stx %g2, [%sp + 2047 + 128 + 0x28]
sethi %hi(kern_locked_tte_data), %g2
ldx [%g2 + %lo(kern_locked_tte_data)], %g2
stx %g2, [%sp + 2047 + 128 + 0x30]
mov 15, %g2
BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
mov 63, %g2
1:
stx %g2, [%sp + 2047 + 128 + 0x38]
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
ldx [%g2 + 0x08], %o1
call %o1
add %sp, (2047 + 128), %o0
sethi %hi(bigkernel), %g2
lduw [%g2 + %lo(bigkernel)], %g2
cmp %g2, 0
be,pt %icc, do_dtlb
nop
sethi %hi(call_method), %g2
or %g2, %lo(call_method), %g2
stx %g2, [%sp + 2047 + 128 + 0x00]
mov 5, %g2
stx %g2, [%sp + 2047 + 128 + 0x08]
mov 1, %g2
stx %g2, [%sp + 2047 + 128 + 0x10]
sethi %hi(itlb_load), %g2
or %g2, %lo(itlb_load), %g2
stx %g2, [%sp + 2047 + 128 + 0x18]
sethi %hi(mmu_ihandle_cache), %g2
lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
stx %g2, [%sp + 2047 + 128 + 0x20]
sethi %hi(KERNBASE + 0x400000), %g2
stx %g2, [%sp + 2047 + 128 + 0x28]
sethi %hi(kern_locked_tte_data), %g2
ldx [%g2 + %lo(kern_locked_tte_data)], %g2
sethi %hi(0x400000), %g1
add %g2, %g1, %g2
stx %g2, [%sp + 2047 + 128 + 0x30]
mov 14, %g2
BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
mov 62, %g2
1:
stx %g2, [%sp + 2047 + 128 + 0x38]
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
ldx [%g2 + 0x08], %o1
call %o1
add %sp, (2047 + 128), %o0
do_dtlb:
sethi %hi(call_method), %g2
or %g2, %lo(call_method), %g2
stx %g2, [%sp + 2047 + 128 + 0x00]
mov 5, %g2
stx %g2, [%sp + 2047 + 128 + 0x08]
mov 1, %g2
stx %g2, [%sp + 2047 + 128 + 0x10]
sethi %hi(dtlb_load), %g2
or %g2, %lo(dtlb_load), %g2
stx %g2, [%sp + 2047 + 128 + 0x18]
sethi %hi(mmu_ihandle_cache), %g2
lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
stx %g2, [%sp + 2047 + 128 + 0x20]
sethi %hi(KERNBASE), %g2
stx %g2, [%sp + 2047 + 128 + 0x28]
sethi %hi(kern_locked_tte_data), %g2
ldx [%g2 + %lo(kern_locked_tte_data)], %g2
stx %g2, [%sp + 2047 + 128 + 0x30]
mov 15, %g2
BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
mov 63, %g2
1:
stx %g2, [%sp + 2047 + 128 + 0x38]
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
ldx [%g2 + 0x08], %o1
call %o1
add %sp, (2047 + 128), %o0
sethi %hi(bigkernel), %g2
lduw [%g2 + %lo(bigkernel)], %g2
cmp %g2, 0
be,pt %icc, do_unlock
nop
sethi %hi(call_method), %g2
or %g2, %lo(call_method), %g2
stx %g2, [%sp + 2047 + 128 + 0x00]
mov 5, %g2
stx %g2, [%sp + 2047 + 128 + 0x08]
mov 1, %g2
stx %g2, [%sp + 2047 + 128 + 0x10]
sethi %hi(dtlb_load), %g2
or %g2, %lo(dtlb_load), %g2
stx %g2, [%sp + 2047 + 128 + 0x18]
sethi %hi(mmu_ihandle_cache), %g2
lduw [%g2 + %lo(mmu_ihandle_cache)], %g2
stx %g2, [%sp + 2047 + 128 + 0x20]
sethi %hi(KERNBASE + 0x400000), %g2
stx %g2, [%sp + 2047 + 128 + 0x28]
sethi %hi(kern_locked_tte_data), %g2
ldx [%g2 + %lo(kern_locked_tte_data)], %g2
sethi %hi(0x400000), %g1
add %g2, %g1, %g2
stx %g2, [%sp + 2047 + 128 + 0x30]
mov 14, %g2
BRANCH_IF_ANY_CHEETAH(g1,g5,1f)
mov 62, %g2
1:
stx %g2, [%sp + 2047 + 128 + 0x38]
sethi %hi(p1275buf), %g2
or %g2, %lo(p1275buf), %g2
ldx [%g2 + 0x08], %o1
call %o1
add %sp, (2047 + 128), %o0
do_unlock:
sethi %hi(prom_entry_lock), %g2
stb %g0, [%g2 + %lo(prom_entry_lock)]
membar #StoreStore | #StoreLoad
mov %l1, %sp
flushw
mov %l0, %o0
wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
wr %g0, 0, %fprs
/* XXX Buggy PROM... */
srl %o0, 0, %o0
ldx [%o0], %g6
wr %g0, ASI_P, %asi
mov PRIMARY_CONTEXT, %g7
stxa %g0, [%g7] ASI_DMMU
membar #Sync
mov SECONDARY_CONTEXT, %g7
stxa %g0, [%g7] ASI_DMMU
membar #Sync
mov 1, %g5
sllx %g5, THREAD_SHIFT, %g5
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
add %g6, %g5, %sp
mov 0, %fp
wrpr %g0, 0, %wstate
wrpr %g0, 0, %tl
/* Setup the trap globals, then we can resurface. */
rdpr %pstate, %o1
mov %g6, %o2
wrpr %o1, PSTATE_AG, %pstate
sethi %hi(sparc64_ttable_tl0), %g5
wrpr %g5, %tba
mov %o2, %g6
wrpr %o1, PSTATE_MG, %pstate
#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
mov TSB_REG, %g1
stxa %g0, [%g1] ASI_DMMU
membar #Sync
mov TLB_SFSR, %g1
sethi %uhi(KERN_HIGHBITS), %g2
or %g2, %ulo(KERN_HIGHBITS), %g2
sllx %g2, 32, %g2
or %g2, KERN_LOWBITS, %g2
BRANCH_IF_ANY_CHEETAH(g3,g7,9f)
ba,pt %xcc, 1f
nop
9:
sethi %uhi(VPTE_BASE_CHEETAH), %g3
or %g3, %ulo(VPTE_BASE_CHEETAH), %g3
ba,pt %xcc, 2f
sllx %g3, 32, %g3
1:
sethi %uhi(VPTE_BASE_SPITFIRE), %g3
or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3
sllx %g3, 32, %g3
2:
clr %g7
#undef KERN_HIGHBITS
#undef KERN_LOWBITS
wrpr %o1, 0x0, %pstate
ldx [%g6 + TI_TASK], %g4
wrpr %g0, 0, %wstate
call init_irqwork_curcpu
nop
BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
ba,pt %xcc, 2f
nop
1: /* Start using proper page size encodings in ctx register. */
sethi %uhi(CTX_CHEETAH_PLUS_NUC), %g3
mov PRIMARY_CONTEXT, %g1
sllx %g3, 32, %g3
sethi %hi(CTX_CHEETAH_PLUS_CTX0), %g2
or %g3, %g2, %g3
stxa %g3, [%g1] ASI_DMMU
membar #Sync
2:
rdpr %pstate, %o1
or %o1, PSTATE_IE, %o1
wrpr %o1, 0, %pstate
call prom_set_trap_table
sethi %hi(sparc64_ttable_tl0), %o0
call smp_callin
nop
call cpu_idle
mov 0, %o0
call cpu_panic
nop
1: b,a,pt %xcc, 1b
.align 8
sparc64_cpu_startup_end: