forked from luck/tmp_suning_uos_patched
a0776ec8e9
It's not efficient to use a per-cpu variable just to store how many physical stack register a cpu has. Ever since the incarnation of ia64 up till upcoming Montecito processor, that variable has "glued" to 96. Having a variable in memory means that the kernel is burning an extra cacheline access on every syscall and kernel exit path. Such "static" value is better served with the instruction patching utility exists today. Convert ia64_phys_stacked_size_p8 into dynamic insn patching. This also has a pleasant side effect of eliminating access to per-cpu area while psr.ic=0 in the kernel exit path. (fixable for per-cpu DTC work, but why bother?) There are some concerns with the default value that the instruc- tion encoded in the kernel image. It shouldn't be concerned. The reasons are: (1) cpu_init() is called at CPU initialization. In there, we find out physical stack register size from PAL and patch two instructions in kernel exit code. The code in question can not be executed before the patching is done. (2) current implementation stores zero in ia64_phys_stacked_size_p8, and that's what the current kernel exit path loads the value with. With the new code, it is equivalent that we store reg size 96 in ia64_phys_stacked_size_p8, thus creating a better safety net. Given (1) above can never fail, having (2) is just a bonus. All in all, this patch allow one less memory reference in the kernel exit path, thus reducing syscall and interrupt return latency; and avoid polluting potential useful data in the CPU cache. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
136 lines
3.2 KiB
C
136 lines
3.2 KiB
C
#ifndef _ASM_IA64_ASMMACRO_H
|
|
#define _ASM_IA64_ASMMACRO_H
|
|
|
|
/*
|
|
* Copyright (C) 2000-2001, 2003-2004 Hewlett-Packard Co
|
|
* David Mosberger-Tang <davidm@hpl.hp.com>
|
|
*/
|
|
|
|
|
|
#define ENTRY(name) \
|
|
.align 32; \
|
|
.proc name; \
|
|
name:
|
|
|
|
#define ENTRY_MIN_ALIGN(name) \
|
|
.align 16; \
|
|
.proc name; \
|
|
name:
|
|
|
|
#define GLOBAL_ENTRY(name) \
|
|
.global name; \
|
|
ENTRY(name)
|
|
|
|
#define END(name) \
|
|
.endp name
|
|
|
|
/*
|
|
* Helper macros to make unwind directives more readable:
|
|
*/
|
|
|
|
/* prologue_gr: */
|
|
#define ASM_UNW_PRLG_RP 0x8
|
|
#define ASM_UNW_PRLG_PFS 0x4
|
|
#define ASM_UNW_PRLG_PSP 0x2
|
|
#define ASM_UNW_PRLG_PR 0x1
|
|
#define ASM_UNW_PRLG_GRSAVE(ninputs) (32+(ninputs))
|
|
|
|
/*
|
|
* Helper macros for accessing user memory.
|
|
*
|
|
* When adding any new .section/.previous entries here, make sure to
|
|
* also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or
|
|
* unpleasant things will happen.
|
|
*/
|
|
|
|
.section "__ex_table", "a" // declare section & section attributes
|
|
.previous
|
|
|
|
# define EX(y,x...) \
|
|
.xdata4 "__ex_table", 99f-., y-.; \
|
|
[99:] x
|
|
# define EXCLR(y,x...) \
|
|
.xdata4 "__ex_table", 99f-., y-.+4; \
|
|
[99:] x
|
|
|
|
/*
|
|
* Tag MCA recoverable instruction ranges.
|
|
*/
|
|
|
|
.section "__mca_table", "a" // declare section & section attributes
|
|
.previous
|
|
|
|
# define MCA_RECOVER_RANGE(y) \
|
|
.xdata4 "__mca_table", y-., 99f-.; \
|
|
[99:]
|
|
|
|
/*
|
|
* Mark instructions that need a load of a virtual address patched to be
|
|
* a load of a physical address. We use this either in critical performance
|
|
* path (ivt.S - TLB miss processing) or in places where it might not be
|
|
* safe to use a "tpa" instruction (mca_asm.S - error recovery).
|
|
*/
|
|
.section ".data.patch.vtop", "a" // declare section & section attributes
|
|
.previous
|
|
|
|
#define LOAD_PHYSICAL(pr, reg, obj) \
|
|
[1:](pr)movl reg = obj; \
|
|
.xdata4 ".data.patch.vtop", 1b-.
|
|
|
|
/*
|
|
* For now, we always put in the McKinley E9 workaround. On CPUs that don't need it,
|
|
* we'll patch out the work-around bundles with NOPs, so their impact is minimal.
|
|
*/
|
|
#define DO_MCKINLEY_E9_WORKAROUND
|
|
|
|
#ifdef DO_MCKINLEY_E9_WORKAROUND
|
|
.section ".data.patch.mckinley_e9", "a"
|
|
.previous
|
|
/* workaround for Itanium 2 Errata 9: */
|
|
# define FSYS_RETURN \
|
|
.xdata4 ".data.patch.mckinley_e9", 1f-.; \
|
|
1:{ .mib; \
|
|
nop.m 0; \
|
|
mov r16=ar.pfs; \
|
|
br.call.sptk.many b7=2f;; \
|
|
}; \
|
|
2:{ .mib; \
|
|
nop.m 0; \
|
|
mov ar.pfs=r16; \
|
|
br.ret.sptk.many b6;; \
|
|
}
|
|
#else
|
|
# define FSYS_RETURN br.ret.sptk.many b6
|
|
#endif
|
|
|
|
/*
|
|
* If physical stack register size is different from DEF_NUM_STACK_REG,
|
|
* dynamically patch the kernel for correct size.
|
|
*/
|
|
.section ".data.patch.phys_stack_reg", "a"
|
|
.previous
|
|
#define LOAD_PHYS_STACK_REG_SIZE(reg) \
|
|
[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \
|
|
.xdata4 ".data.patch.phys_stack_reg", 1b-.
|
|
|
|
/*
|
|
* Up until early 2004, use of .align within a function caused bad unwind info.
|
|
* TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
|
|
* otherwise.
|
|
*/
|
|
#ifdef HAVE_WORKING_TEXT_ALIGN
|
|
# define TEXT_ALIGN(n) .align n
|
|
#else
|
|
# define TEXT_ALIGN(n)
|
|
#endif
|
|
|
|
#ifdef HAVE_SERIALIZE_DIRECTIVE
|
|
# define dv_serialize_data .serialize.data
|
|
# define dv_serialize_instruction .serialize.instruction
|
|
#else
|
|
# define dv_serialize_data
|
|
# define dv_serialize_instruction
|
|
#endif
|
|
|
|
#endif /* _ASM_IA64_ASMMACRO_H */
|