kernel_optimize_test/include/asm-i386/pda.h
Jeremy Fitzhardinge 464d1a78fb [PATCH] i386: Convert i386 PDA code to use %fs
Convert the PDA code to use %fs rather than %gs as the segment for
per-processor data.  This is because some processors show a small but
measurable performance gain for reloading a NULL segment selector (as %fs
generally is in user-space) versus a non-NULL one (as %gs generally is).

On modern processors the difference is very small, perhaps undetectable.
Some old AMD "K6 3D+" processors are noticably slower when %fs is used
rather than %gs; I have no idea why this might be, but I think they're
sufficiently rare that it doesn't matter much.

This patch also fixes the math emulator, which had not been adjusted to
match the changed struct pt_regs.

[frederik.deweerdt@gmail.com: fixit with gdb]
[mingo@elte.hu: Fix KVM too]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Ian Campbell <Ian.Campbell@XenSource.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Zachary Amsden <zach@vmware.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2007-02-13 13:26:20 +01:00

101 lines
2.8 KiB
C

/*
Per-processor Data Areas
Jeremy Fitzhardinge <jeremy@goop.org> 2006
Based on asm-x86_64/pda.h by Andi Kleen.
*/
#ifndef _I386_PDA_H
#define _I386_PDA_H
#include <linux/stddef.h>
#include <linux/types.h>
struct i386_pda
{
struct i386_pda *_pda; /* pointer to self */
int cpu_number;
struct task_struct *pcurrent; /* current process */
struct pt_regs *irq_regs;
};
extern struct i386_pda *_cpu_pda[];
#define cpu_pda(i) (_cpu_pda[i])
#define pda_offset(field) offsetof(struct i386_pda, field)
extern void __bad_pda_field(void);
/* This variable is never instantiated. It is only used as a stand-in
for the real per-cpu PDA memory, so that gcc can understand what
memory operations the inline asms() below are performing. This
eliminates the need to make the asms volatile or have memory
clobbers, so gcc can readily analyse them. */
extern struct i386_pda _proxy_pda;
#define pda_to_op(op,field,val) \
do { \
typedef typeof(_proxy_pda.field) T__; \
if (0) { T__ tmp__; tmp__ = (val); } \
switch (sizeof(_proxy_pda.field)) { \
case 1: \
asm(op "b %1,%%fs:%c2" \
: "+m" (_proxy_pda.field) \
:"ri" ((T__)val), \
"i"(pda_offset(field))); \
break; \
case 2: \
asm(op "w %1,%%fs:%c2" \
: "+m" (_proxy_pda.field) \
:"ri" ((T__)val), \
"i"(pda_offset(field))); \
break; \
case 4: \
asm(op "l %1,%%fs:%c2" \
: "+m" (_proxy_pda.field) \
:"ri" ((T__)val), \
"i"(pda_offset(field))); \
break; \
default: __bad_pda_field(); \
} \
} while (0)
#define pda_from_op(op,field) \
({ \
typeof(_proxy_pda.field) ret__; \
switch (sizeof(_proxy_pda.field)) { \
case 1: \
asm(op "b %%fs:%c1,%0" \
: "=r" (ret__) \
: "i" (pda_offset(field)), \
"m" (_proxy_pda.field)); \
break; \
case 2: \
asm(op "w %%fs:%c1,%0" \
: "=r" (ret__) \
: "i" (pda_offset(field)), \
"m" (_proxy_pda.field)); \
break; \
case 4: \
asm(op "l %%fs:%c1,%0" \
: "=r" (ret__) \
: "i" (pda_offset(field)), \
"m" (_proxy_pda.field)); \
break; \
default: __bad_pda_field(); \
} \
ret__; })
/* Return a pointer to a pda field */
#define pda_addr(field) \
((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \
pda_offset(field)))
#define read_pda(field) pda_from_op("mov",field)
#define write_pda(field,val) pda_to_op("mov",field,val)
#define add_pda(field,val) pda_to_op("add",field,val)
#define sub_pda(field,val) pda_to_op("sub",field,val)
#define or_pda(field,val) pda_to_op("or",field,val)
#endif /* _I386_PDA_H */