perf bench: Add event synthesis benchmark

Event synthesis may occur at the start or end (tail) of a perf command.
In system-wide mode it can scan every process in /proc, which may add
seconds of latency before event recording. Add a new benchmark that
times how long event synthesis takes with and without data synthesis.

An example execution looks like:

 $ perf bench internals synthesize
 # Running 'internals/synthesize' benchmark:
 Average synthesis took: 168.253800 usec
 Average data synthesis took: 208.104700 usec

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrey Zhizhikin <andrey.z@gmail.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20200402154357.107873-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ian Rogers 2020-04-02 08:43:53 -07:00 committed by Arnaldo Carvalho de Melo
parent 1a2725f3ee
commit 2a4b51666a
5 changed files with 117 additions and 2 deletions

View File

@ -61,6 +61,9 @@ SUBSYSTEM
'epoll'::
Eventpoll (epoll) stressing benchmarks.
'internals'::
Benchmark internal perf functionality.
'all'::
All benchmark subsystems.
@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls.
*ctl*::
Suite for evaluating multiple epoll_ctl calls.
SUITES FOR 'internals'
~~~~~~~~~~~~~~~~~~~~~~
*synthesize*::
Suite for evaluating perf's event synthesis performance.
SEE ALSO
--------
linkperf:perf[1]

View File

@ -6,9 +6,9 @@ perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
perf-y += synthesize.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o

View File

@ -41,9 +41,9 @@ int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
int bench_synthesize(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0

View File

@ -0,0 +1,101 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Benchmark synthesis of perf events such as at the start of a 'perf
* record'. Synthesis is done on the current process and the 'dummy' event
* handlers are invoked that support dump_trace but otherwise do nothing.
*
* Copyright 2019 Google LLC.
*/
#include <stdio.h>
#include "bench.h"
#include "../util/debug.h"
#include "../util/session.h"
#include "../util/synthetic-events.h"
#include "../util/target.h"
#include "../util/thread_map.h"
#include "../util/tool.h"
#include <linux/err.h>
#include <linux/time64.h>
#include <subcmd/parse-options.h>
static unsigned int iterations = 10000;
static const struct option options[] = {
OPT_UINTEGER('i', "iterations", &iterations,
"Number of iterations used to compute average"),
OPT_END()
};
static const char *const usage[] = {
"perf bench internals synthesize <options>",
NULL
};
static int do_synthesize(struct perf_session *session,
struct perf_thread_map *threads,
struct target *target, bool data_mmap)
{
const unsigned int nr_threads_synthesize = 1;
struct timeval start, end, diff;
u64 runtime_us;
unsigned int i;
double average;
int err;
gettimeofday(&start, NULL);
for (i = 0; i < iterations; i++) {
err = machine__synthesize_threads(&session->machines.host,
target, threads, data_mmap,
nr_threads_synthesize);
if (err)
return err;
}
gettimeofday(&end, NULL);
timersub(&end, &start, &diff);
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
average = (double)runtime_us/(double)iterations;
printf("Average %ssynthesis took: %f usec\n",
data_mmap ? "data " : "", average);
return 0;
}
int bench_synthesize(int argc, const char **argv)
{
struct perf_tool tool;
struct perf_session *session;
struct target target = {
.pid = "self",
};
struct perf_thread_map *threads;
int err;
argc = parse_options(argc, argv, options, usage, 0);
session = perf_session__new(NULL, false, NULL);
if (IS_ERR(session)) {
pr_err("Session creation failed.\n");
return PTR_ERR(session);
}
threads = thread_map__new_by_pid(getpid());
if (!threads) {
pr_err("Thread map creation failed.\n");
err = -ENOMEM;
goto err_out;
}
perf_tool__fill_defaults(&tool);
err = do_synthesize(session, threads, &target, false);
if (err)
goto err_out;
err = do_synthesize(session, threads, &target, true);
err_out:
if (threads)
perf_thread_map__put(threads);
perf_session__delete(session);
return err;
}

View File

@ -76,6 +76,11 @@ static struct bench epoll_benchmarks[] = {
};
#endif // HAVE_EVENTFD
static struct bench internals_benchmarks[] = {
{ "synthesize", "Benchmark perf event synthesis", bench_synthesize },
{ NULL, NULL, NULL }
};
struct collection {
const char *name;
const char *summary;
@ -92,6 +97,7 @@ static struct collection collections[] = {
#ifdef HAVE_EVENTFD
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
#endif
{ "internals", "Perf-internals benchmarks", internals_benchmarks },
{ "all", "All benchmarks", NULL },
{ NULL, NULL, NULL }
};