forked from luck/tmp_suning_uos_patched
1f0d69a9fc
Impact: new unlikely/likely profiler Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2167 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2670 0 0 IS_ERR err.h 34 71230 5693 7 __switch_to process_64.c 673 76919 0 0 __switch_to process_64.c 639 43184 33743 43 __switch_to process_64.c 624 12740 64181 83 __switch_to process_64.c 594 12740 64174 83 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 44963 35259 43 __switch_to process_64.c 624 12762 67454 84 __switch_to process_64.c 594 12762 67447 84 __switch_to process_64.c 590 1478 595 28 syscall_get_error syscall.h 51 0 2821 100 syscall_trace_leave ptrace.c 1567 0 1 100 native_smp_prepare_cpus smpboot.c 1237 86338 265881 75 calc_delta_fair sched_fair.c 408 210410 108540 34 calc_delta_mine sched.c 1267 0 54550 100 sched_info_queued sched_stats.h 222 51899 66435 56 pick_next_task_fair sched_fair.c 1422 6 10 62 yield_task_fair sched_fair.c 982 7325 2692 26 rt_policy sched.c 144 0 1270 100 pre_schedule_rt sched_rt.c 1261 1268 48073 97 pick_next_task_rt sched_rt.c 884 0 45181 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8700 0 15 100 sched_move_task sched.c 8690 53167 33217 38 schedule sched.c 4457 0 80208 100 sched_info_switch sched_stats.h 270 30585 49631 61 context_switch sched.c 2619 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 39900 36577 47 pick_next_task sched.c 4397 20824 15233 42 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 617 66484 99 clocksource_adjust timekeeping.c 456 0 346340 100 audit_syscall_exit auditsc.c 1570 38 347350 99 audit_get_context auditsc.c 732 0 345244 100 audit_syscall_entry auditsc.c 1541 38 1017 96 audit_free auditsc.c 1446 0 1090 100 audit_alloc auditsc.c 862 2618 1090 29 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 9 1 198 99 probe_sched_wakeup trace_sched_switch.c 58 2 2 50 probe_wakeup trace_sched_wakeup.c 227 0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144 4514 2090 31 __grab_cache_page filemap.c 2149 12882 228786 94 mapping_unevictable pagemap.h 50 4 11 73 __flush_cpu_slab slub.c 1466 627757 330451 34 slab_free slub.c 1731 2959 61245 95 dentry_lru_del_init dcache.c 153 946 1217 56 load_elf_binary binfmt_elf.c 904 102 82 44 disk_put_part genhd.h 206 1 1 50 dst_gc_task dst.c 82 0 19 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 71 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Theodore Tso <tytso@mit.edu> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
259 lines
7.2 KiB
C
259 lines
7.2 KiB
C
#ifndef __LINUX_COMPILER_H
|
|
#define __LINUX_COMPILER_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#ifdef __CHECKER__
|
|
# define __user __attribute__((noderef, address_space(1)))
|
|
# define __kernel /* default address space */
|
|
# define __safe __attribute__((safe))
|
|
# define __force __attribute__((force))
|
|
# define __nocast __attribute__((nocast))
|
|
# define __iomem __attribute__((noderef, address_space(2)))
|
|
# define __acquires(x) __attribute__((context(x,0,1)))
|
|
# define __releases(x) __attribute__((context(x,1,0)))
|
|
# define __acquire(x) __context__(x,1)
|
|
# define __release(x) __context__(x,-1)
|
|
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
|
|
extern void __chk_user_ptr(const volatile void __user *);
|
|
extern void __chk_io_ptr(const volatile void __iomem *);
|
|
#else
|
|
# define __user
|
|
# define __kernel
|
|
# define __safe
|
|
# define __force
|
|
# define __nocast
|
|
# define __iomem
|
|
# define __chk_user_ptr(x) (void)0
|
|
# define __chk_io_ptr(x) (void)0
|
|
# define __builtin_warning(x, y...) (1)
|
|
# define __acquires(x)
|
|
# define __releases(x)
|
|
# define __acquire(x) (void)0
|
|
# define __release(x) (void)0
|
|
# define __cond_lock(x,c) (c)
|
|
#endif
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#if __GNUC__ >= 4
|
|
# include <linux/compiler-gcc4.h>
|
|
#elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2
|
|
# include <linux/compiler-gcc3.h>
|
|
#else
|
|
# error Sorry, your compiler is too old/not recognized.
|
|
#endif
|
|
|
|
#define notrace __attribute__((no_instrument_function))
|
|
|
|
/* Intel compiler defines __GNUC__. So we will overwrite implementations
|
|
* coming from above header files here
|
|
*/
|
|
#ifdef __INTEL_COMPILER
|
|
# include <linux/compiler-intel.h>
|
|
#endif
|
|
|
|
/*
|
|
* Generic compiler-dependent macros required for kernel
|
|
* build go below this comment. Actual compiler/compiler version
|
|
* specific implementations come from the above header files
|
|
*/
|
|
|
|
#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
|
|
struct ftrace_likely_data {
|
|
const char *func;
|
|
const char *file;
|
|
unsigned line;
|
|
unsigned long correct;
|
|
unsigned long incorrect;
|
|
};
|
|
void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect);
|
|
|
|
#define likely_notrace(x) __builtin_expect(!!(x), 1)
|
|
#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
|
|
|
|
#define likely_check(x) ({ \
|
|
int ______r; \
|
|
static struct ftrace_likely_data \
|
|
__attribute__((__aligned__(4))) \
|
|
__attribute__((section("_ftrace_likely"))) \
|
|
______f = { \
|
|
.func = __func__, \
|
|
.file = __FILE__, \
|
|
.line = __LINE__, \
|
|
}; \
|
|
______f.line = __LINE__; \
|
|
______r = likely_notrace(x); \
|
|
ftrace_likely_update(&______f, ______r, 1); \
|
|
______r; \
|
|
})
|
|
#define unlikely_check(x) ({ \
|
|
int ______r; \
|
|
static struct ftrace_likely_data \
|
|
__attribute__((__aligned__(4))) \
|
|
__attribute__((section("_ftrace_unlikely"))) \
|
|
______f = { \
|
|
.func = __func__, \
|
|
.file = __FILE__, \
|
|
.line = __LINE__, \
|
|
}; \
|
|
______f.line = __LINE__; \
|
|
______r = unlikely_notrace(x); \
|
|
ftrace_likely_update(&______f, ______r, 0); \
|
|
______r; \
|
|
})
|
|
|
|
/*
|
|
* Using __builtin_constant_p(x) to ignore cases where the return
|
|
* value is always the same. This idea is taken from a similar patch
|
|
* written by Daniel Walker.
|
|
*/
|
|
# ifndef likely
|
|
# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x))
|
|
# endif
|
|
# ifndef unlikely
|
|
# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x))
|
|
# endif
|
|
#else
|
|
# define likely(x) __builtin_expect(!!(x), 1)
|
|
# define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#endif
|
|
|
|
/* Optimization barrier */
|
|
#ifndef barrier
|
|
# define barrier() __memory_barrier()
|
|
#endif
|
|
|
|
#ifndef RELOC_HIDE
|
|
# define RELOC_HIDE(ptr, off) \
|
|
({ unsigned long __ptr; \
|
|
__ptr = (unsigned long) (ptr); \
|
|
(typeof(ptr)) (__ptr + (off)); })
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#ifdef __KERNEL__
|
|
/*
|
|
* Allow us to mark functions as 'deprecated' and have gcc emit a nice
|
|
* warning for each use, in hopes of speeding the functions removal.
|
|
* Usage is:
|
|
* int __deprecated foo(void)
|
|
*/
|
|
#ifndef __deprecated
|
|
# define __deprecated /* unimplemented */
|
|
#endif
|
|
|
|
#ifdef MODULE
|
|
#define __deprecated_for_modules __deprecated
|
|
#else
|
|
#define __deprecated_for_modules
|
|
#endif
|
|
|
|
#ifndef __must_check
|
|
#define __must_check
|
|
#endif
|
|
|
|
#ifndef CONFIG_ENABLE_MUST_CHECK
|
|
#undef __must_check
|
|
#define __must_check
|
|
#endif
|
|
#ifndef CONFIG_ENABLE_WARN_DEPRECATED
|
|
#undef __deprecated
|
|
#undef __deprecated_for_modules
|
|
#define __deprecated
|
|
#define __deprecated_for_modules
|
|
#endif
|
|
|
|
/*
|
|
* Allow us to avoid 'defined but not used' warnings on functions and data,
|
|
* as well as force them to be emitted to the assembly file.
|
|
*
|
|
* As of gcc 3.4, static functions that are not marked with attribute((used))
|
|
* may be elided from the assembly file. As of gcc 3.4, static data not so
|
|
* marked will not be elided, but this may change in a future gcc version.
|
|
*
|
|
* NOTE: Because distributions shipped with a backported unit-at-a-time
|
|
* compiler in gcc 3.3, we must define __used to be __attribute__((used))
|
|
* for gcc >=3.3 instead of 3.4.
|
|
*
|
|
* In prior versions of gcc, such functions and data would be emitted, but
|
|
* would be warned about except with attribute((unused)).
|
|
*
|
|
* Mark functions that are referenced only in inline assembly as __used so
|
|
* the code is emitted even though it appears to be unreferenced.
|
|
*/
|
|
#ifndef __used
|
|
# define __used /* unimplemented */
|
|
#endif
|
|
|
|
#ifndef __maybe_unused
|
|
# define __maybe_unused /* unimplemented */
|
|
#endif
|
|
|
|
#ifndef noinline
|
|
#define noinline
|
|
#endif
|
|
|
|
/*
|
|
* Rather then using noinline to prevent stack consumption, use
|
|
* noinline_for_stack instead. For documentaiton reasons.
|
|
*/
|
|
#define noinline_for_stack noinline
|
|
|
|
#ifndef __always_inline
|
|
#define __always_inline inline
|
|
#endif
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
/*
|
|
* From the GCC manual:
|
|
*
|
|
* Many functions do not examine any values except their arguments,
|
|
* and have no effects except the return value. Basically this is
|
|
* just slightly more strict class than the `pure' attribute above,
|
|
* since function is not allowed to read global memory.
|
|
*
|
|
* Note that a function that has pointer arguments and examines the
|
|
* data pointed to must _not_ be declared `const'. Likewise, a
|
|
* function that calls a non-`const' function usually must not be
|
|
* `const'. It does not make sense for a `const' function to return
|
|
* `void'.
|
|
*/
|
|
#ifndef __attribute_const__
|
|
# define __attribute_const__ /* unimplemented */
|
|
#endif
|
|
|
|
/*
|
|
* Tell gcc if a function is cold. The compiler will assume any path
|
|
* directly leading to the call is unlikely.
|
|
*/
|
|
|
|
#ifndef __cold
|
|
#define __cold
|
|
#endif
|
|
|
|
/* Simple shorthand for a section definition */
|
|
#ifndef __section
|
|
# define __section(S) __attribute__ ((__section__(#S)))
|
|
#endif
|
|
|
|
/*
|
|
* Prevent the compiler from merging or refetching accesses. The compiler
|
|
* is also forbidden from reordering successive instances of ACCESS_ONCE(),
|
|
* but only when the compiler is aware of some particular ordering. One way
|
|
* to make the compiler aware of ordering is to put the two invocations of
|
|
* ACCESS_ONCE() in different C statements.
|
|
*
|
|
* This macro does absolutely -nothing- to prevent the CPU from reordering,
|
|
* merging, or refetching absolutely anything at any time. Its main intended
|
|
* use is to mediate communication between process-level code and irq/NMI
|
|
* handlers, all running on the same CPU.
|
|
*/
|
|
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
|
|
|
#endif /* __LINUX_COMPILER_H */
|