forked from luck/tmp_suning_uos_patched
0bf02a0d80
Sometimes I can see that 'perf record' piped with 'perf inject' take a long time processing build-ids. So introduce a inject-build-id benchmark to the internals benchmark suite to measure its overhead regularly. It runs the 'perf inject' command internally and feeds the given number of synthesized events (MMAP2 + SAMPLE basically). Usage: perf bench internals inject-build-id <options> -i, --iterations <n> Number of iterations used to compute average (default: 100) -m, --nr-mmaps <n> Number of mmap events for each iteration (default: 100) -n, --nr-samples <n> Number of sample events per mmap event (default: 100) -v, --verbose be more verbose (show iteration count, DSO name, etc) By default, it measures average processing time of 100 MMAP2 events and 10000 SAMPLE events. Below is a result on my laptop. $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 25.789 msec (+- 0.202 msec) Average time per event: 2.528 usec (+- 0.020 usec) Average memory usage: 8411 KB (+- 7 KB) Committer testing: $ perf bench Usage: perf bench [<common options>] <collection> <benchmark> [<options>] # List of all available benchmark collections: sched: Scheduler and IPC benchmarks syscall: System call benchmarks mem: Memory access benchmarks numa: NUMA scheduling and MM benchmarks futex: Futex stressing benchmarks epoll: Epoll stressing benchmarks internals: Perf-internals benchmarks all: All benchmarks $ perf bench internals # List of available benchmarks for collection 'internals': synthesize: Benchmark perf event synthesis kallsyms-parse: Benchmark kallsyms parsing inject-build-id: Benchmark build-id injection $ perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.202 msec (+- 0.059 msec) Average time per event: 1.392 usec (+- 0.006 usec) Average memory usage: 12650 KB (+- 10 KB) Average build-id-all injection took: 12.831 msec (+- 0.071 msec) Average time per event: 1.258 usec (+- 0.007 usec) Average memory usage: 11895 KB (+- 10 KB) $ $ perf stat -r5 perf bench internals inject-build-id # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.380 msec (+- 0.056 msec) Average time per event: 1.410 usec (+- 0.006 usec) Average memory usage: 12608 KB (+- 11 KB) Average build-id-all injection took: 11.889 msec (+- 0.064 msec) Average time per event: 1.166 usec (+- 0.006 usec) Average memory usage: 11838 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.246 msec (+- 0.065 msec) Average time per event: 1.397 usec (+- 0.006 usec) Average memory usage: 12744 KB (+- 10 KB) Average build-id-all injection took: 12.019 msec (+- 0.066 msec) Average time per event: 1.178 usec (+- 0.006 usec) Average memory usage: 11963 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.321 msec (+- 0.067 msec) Average time per event: 1.404 usec (+- 0.007 usec) Average memory usage: 12690 KB (+- 10 KB) Average build-id-all injection took: 11.909 msec (+- 0.041 msec) Average time per event: 1.168 usec (+- 0.004 usec) Average memory usage: 11938 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.287 msec (+- 0.059 msec) Average time per event: 1.401 usec (+- 0.006 usec) Average memory usage: 12864 KB (+- 10 KB) Average build-id-all injection took: 11.862 msec (+- 0.058 msec) Average time per event: 1.163 usec (+- 0.006 usec) Average memory usage: 12103 KB (+- 10 KB) # Running 'internals/inject-build-id' benchmark: Average build-id injection took: 14.402 msec (+- 0.053 msec) Average time per event: 1.412 usec (+- 0.005 usec) Average memory usage: 12876 KB (+- 10 KB) Average build-id-all injection took: 11.826 msec (+- 0.061 msec) Average time per event: 1.159 usec (+- 0.006 usec) Average memory usage: 12111 KB (+- 10 KB) Performance counter stats for 'perf bench internals inject-build-id' (5 runs): 4,267.48 msec task-clock:u # 1.502 CPUs utilized ( +- 0.14% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 102,092 page-faults:u # 0.024 M/sec ( +- 0.08% ) 3,894,589,578 cycles:u # 0.913 GHz ( +- 0.19% ) (83.49%) 140,078,421 stalled-cycles-frontend:u # 3.60% frontend cycles idle ( +- 0.77% ) (83.34%) 948,581,189 stalled-cycles-backend:u # 24.36% backend cycles idle ( +- 0.46% ) (83.25%) 5,835,587,719 instructions:u # 1.50 insn per cycle # 0.16 stalled cycles per insn ( +- 0.21% ) (83.24%) 1,267,423,636 branches:u # 296.996 M/sec ( +- 0.22% ) (83.12%) 17,484,290 branch-misses:u # 1.38% of all branches ( +- 0.12% ) (83.55%) 2.84176 +- 0.00222 seconds time elapsed ( +- 0.08% ) $ Acked-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20201012070214.2074921-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
74 lines
2.1 KiB
C
74 lines
2.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef BENCH_H
|
|
#define BENCH_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
extern struct timeval bench__start, bench__end, bench__runtime;
|
|
|
|
/*
|
|
* The madvise transparent hugepage constants were added in glibc
|
|
* 2.13. For compatibility with older versions of glibc, define these
|
|
* tokens if they are not already defined.
|
|
*
|
|
* PA-RISC uses different madvise values from other architectures and
|
|
* needs to be special-cased.
|
|
*/
|
|
#ifdef __hppa__
|
|
# ifndef MADV_HUGEPAGE
|
|
# define MADV_HUGEPAGE 67
|
|
# endif
|
|
# ifndef MADV_NOHUGEPAGE
|
|
# define MADV_NOHUGEPAGE 68
|
|
# endif
|
|
#else
|
|
# ifndef MADV_HUGEPAGE
|
|
# define MADV_HUGEPAGE 14
|
|
# endif
|
|
# ifndef MADV_NOHUGEPAGE
|
|
# define MADV_NOHUGEPAGE 15
|
|
# endif
|
|
#endif
|
|
|
|
int bench_numa(int argc, const char **argv);
|
|
int bench_sched_messaging(int argc, const char **argv);
|
|
int bench_sched_pipe(int argc, const char **argv);
|
|
int bench_syscall_basic(int argc, const char **argv);
|
|
int bench_mem_memcpy(int argc, const char **argv);
|
|
int bench_mem_memset(int argc, const char **argv);
|
|
int bench_mem_find_bit(int argc, const char **argv);
|
|
int bench_futex_hash(int argc, const char **argv);
|
|
int bench_futex_wake(int argc, const char **argv);
|
|
int bench_futex_wake_parallel(int argc, const char **argv);
|
|
int bench_futex_requeue(int argc, const char **argv);
|
|
/* pi futexes */
|
|
int bench_futex_lock_pi(int argc, const char **argv);
|
|
int bench_epoll_wait(int argc, const char **argv);
|
|
int bench_epoll_ctl(int argc, const char **argv);
|
|
int bench_synthesize(int argc, const char **argv);
|
|
int bench_kallsyms_parse(int argc, const char **argv);
|
|
int bench_inject_build_id(int argc, const char **argv);
|
|
|
|
#define BENCH_FORMAT_DEFAULT_STR "default"
|
|
#define BENCH_FORMAT_DEFAULT 0
|
|
#define BENCH_FORMAT_SIMPLE_STR "simple"
|
|
#define BENCH_FORMAT_SIMPLE 1
|
|
|
|
#define BENCH_FORMAT_UNKNOWN -1
|
|
|
|
extern int bench_format;
|
|
extern unsigned int bench_repeat;
|
|
|
|
#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
|
|
#include <pthread.h>
|
|
#include <linux/compiler.h>
|
|
static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused,
|
|
size_t cpusetsize __maybe_unused,
|
|
cpu_set_t *cpuset __maybe_unused)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#endif
|