forked from luck/tmp_suning_uos_patched
[IA64] fsys_getcpu for IA64
On 1.6GHz Montectio Tiger4, the following performance data is measured with kernel built with defconfig which has NUMA configured: Fastest sys_getcpu: 502 itc counts. Fastest fsys_getcpu: 28 itc counts. fsys_getcpu performance is largly impacted by whether data (node_to_cpu_map etc) is in cache. It can take fsys_getcpu up to ~150 itc counts in cold cache case. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
parent
ddbad07630
commit
3bc207d2b7
@ -35,6 +35,7 @@ void foo(void)
|
|||||||
BLANK();
|
BLANK();
|
||||||
|
|
||||||
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
||||||
|
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
||||||
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
|
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
|
||||||
|
|
||||||
BLANK();
|
BLANK();
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
* probably broke it along the way... ;-)
|
* probably broke it along the way... ;-)
|
||||||
* 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
|
* 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
|
||||||
* it capable of using memory based clocks without falling back to C code.
|
* it capable of using memory based clocks without falling back to C code.
|
||||||
|
* 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <asm/asmmacro.h>
|
#include <asm/asmmacro.h>
|
||||||
@ -505,6 +507,59 @@ EX(.fail_efault, (p15) st8 [r34]=r3)
|
|||||||
#endif
|
#endif
|
||||||
END(fsys_rt_sigprocmask)
|
END(fsys_rt_sigprocmask)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* fsys_getcpu doesn't use the third parameter in this implementation. It reads
|
||||||
|
* current_thread_info()->cpu and corresponding node in cpu_to_node_map.
|
||||||
|
*/
|
||||||
|
ENTRY(fsys_getcpu)
|
||||||
|
.prologue
|
||||||
|
.altrp b6
|
||||||
|
.body
|
||||||
|
;;
|
||||||
|
add r2=TI_FLAGS+IA64_TASK_SIZE,r16
|
||||||
|
tnat.nz p6,p0 = r32 // guard against NaT argument
|
||||||
|
add r3=TI_CPU+IA64_TASK_SIZE,r16
|
||||||
|
;;
|
||||||
|
ld4 r3=[r3] // M r3 = thread_info->cpu
|
||||||
|
ld4 r2=[r2] // M r2 = thread_info->flags
|
||||||
|
(p6) br.cond.spnt.few .fail_einval // B
|
||||||
|
;;
|
||||||
|
tnat.nz p7,p0 = r33 // I guard against NaT argument
|
||||||
|
(p7) br.cond.spnt.few .fail_einval // B
|
||||||
|
#ifdef CONFIG_NUMA
|
||||||
|
movl r17=cpu_to_node_map
|
||||||
|
;;
|
||||||
|
EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
|
||||||
|
EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
|
||||||
|
shladd r18=r3,1,r17
|
||||||
|
;;
|
||||||
|
ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
|
||||||
|
and r2 = TIF_ALLWORK_MASK,r2
|
||||||
|
;;
|
||||||
|
cmp.ne p8,p0=0,r2
|
||||||
|
(p8) br.spnt.many fsys_fallback_syscall
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
EX(.fail_efault, st4 [r32] = r3)
|
||||||
|
EX(.fail_efault, st2 [r33] = r20)
|
||||||
|
mov r8=0
|
||||||
|
;;
|
||||||
|
#else
|
||||||
|
EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
|
||||||
|
EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
|
||||||
|
and r2 = TIF_ALLWORK_MASK,r2
|
||||||
|
;;
|
||||||
|
cmp.ne p8,p0=0,r2
|
||||||
|
(p8) br.spnt.many fsys_fallback_syscall
|
||||||
|
;;
|
||||||
|
EX(.fail_efault, st4 [r32] = r3)
|
||||||
|
EX(.fail_efault, st2 [r33] = r0)
|
||||||
|
mov r8=0
|
||||||
|
;;
|
||||||
|
#endif
|
||||||
|
FSYS_RETURN
|
||||||
|
END(fsys_getcpu)
|
||||||
|
|
||||||
ENTRY(fsys_fallback_syscall)
|
ENTRY(fsys_fallback_syscall)
|
||||||
.prologue
|
.prologue
|
||||||
.altrp b6
|
.altrp b6
|
||||||
@ -878,6 +933,56 @@ fsyscall_table:
|
|||||||
data8 0 // timer_delete
|
data8 0 // timer_delete
|
||||||
data8 0 // clock_settime
|
data8 0 // clock_settime
|
||||||
data8 fsys_clock_gettime // clock_gettime
|
data8 fsys_clock_gettime // clock_gettime
|
||||||
|
data8 0 // clock_getres // 1255
|
||||||
|
data8 0 // clock_nanosleep
|
||||||
|
data8 0 // fstatfs64
|
||||||
|
data8 0 // statfs64
|
||||||
|
data8 0 // mbind
|
||||||
|
data8 0 // get_mempolicy // 1260
|
||||||
|
data8 0 // set_mempolicy
|
||||||
|
data8 0 // mq_open
|
||||||
|
data8 0 // mq_unlink
|
||||||
|
data8 0 // mq_timedsend
|
||||||
|
data8 0 // mq_timedreceive // 1265
|
||||||
|
data8 0 // mq_notify
|
||||||
|
data8 0 // mq_getsetattr
|
||||||
|
data8 0 // kexec_load
|
||||||
|
data8 0 // vserver
|
||||||
|
data8 0 // waitid // 1270
|
||||||
|
data8 0 // add_key
|
||||||
|
data8 0 // request_key
|
||||||
|
data8 0 // keyctl
|
||||||
|
data8 0 // ioprio_set
|
||||||
|
data8 0 // ioprio_get // 1275
|
||||||
|
data8 0 // move_pages
|
||||||
|
data8 0 // inotify_init
|
||||||
|
data8 0 // inotify_add_watch
|
||||||
|
data8 0 // inotify_rm_watch
|
||||||
|
data8 0 // migrate_pages // 1280
|
||||||
|
data8 0 // openat
|
||||||
|
data8 0 // mkdirat
|
||||||
|
data8 0 // mknodat
|
||||||
|
data8 0 // fchownat
|
||||||
|
data8 0 // futimesat // 1285
|
||||||
|
data8 0 // newfstatat
|
||||||
|
data8 0 // unlinkat
|
||||||
|
data8 0 // renameat
|
||||||
|
data8 0 // linkat
|
||||||
|
data8 0 // symlinkat // 1290
|
||||||
|
data8 0 // readlinkat
|
||||||
|
data8 0 // fchmodat
|
||||||
|
data8 0 // faccessat
|
||||||
|
data8 0
|
||||||
|
data8 0 // 1295
|
||||||
|
data8 0 // unshare
|
||||||
|
data8 0 // splice
|
||||||
|
data8 0 // set_robust_list
|
||||||
|
data8 0 // get_robust_list
|
||||||
|
data8 0 // sync_file_range // 1300
|
||||||
|
data8 0 // tee
|
||||||
|
data8 0 // vmsplice
|
||||||
|
data8 0
|
||||||
|
data8 fsys_getcpu // getcpu // 1304
|
||||||
|
|
||||||
// fill in zeros for the remaining entries
|
// fill in zeros for the remaining entries
|
||||||
.zero:
|
.zero:
|
||||||
|
Loading…
Reference in New Issue
Block a user