forked from luck/tmp_suning_uos_patched
perf sched: Add sched latency profiling
Add the -l --latency option that reports statistics about the scheduler latencies. For now, the latencies are measured in the following sequence scope: - task A is sleeping (D or S state) - task B wakes up A ^ | | latency timeframe | | v - task A is scheduled in Start by recording every scheduler events: perf record -e sched:* and then fetch the results: perf sched -l Tasks count total avg max migration/0 2 39849 19924 28826 ksoftirqd/0 7 756383 108054 373014 migration/1 5 45391 9078 10452 ksoftirqd/1 2 399055 199527 359130 events/0 8 4780110 597513 4500250 events/1 9 6353057 705895 2986012 kblockd/0 42 37805097 900121 5077684 The snapshot are in nanoseconds. - Count: number of snapshots taken for the given task - Total: total latencies in nanosec - Avg : average of latency between wake up and sched in - Max : max snapshot latency Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
419ab0d6a9
commit
cdce9d738b
|
@ -31,6 +31,7 @@ static struct perf_header *header;
|
|||
static u64 sample_type;
|
||||
|
||||
static int replay_mode;
|
||||
static int lat_mode;
|
||||
|
||||
|
||||
/*
|
||||
|
@ -847,6 +848,269 @@ static struct trace_sched_handler replay_ops = {
|
|||
.fork_event = replay_fork_event,
|
||||
};
|
||||
|
||||
#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
|
||||
|
||||
enum thread_state {
|
||||
THREAD_SLEEPING,
|
||||
THREAD_WAKED_UP,
|
||||
THREAD_SCHED_IN,
|
||||
THREAD_IGNORE
|
||||
};
|
||||
|
||||
struct lat_snapshot {
|
||||
struct list_head list;
|
||||
enum thread_state state;
|
||||
u64 wake_up_time;
|
||||
u64 sched_in_time;
|
||||
};
|
||||
|
||||
struct thread_latency {
|
||||
struct list_head snapshot_list;
|
||||
struct thread *thread;
|
||||
struct rb_node node;
|
||||
};
|
||||
|
||||
static struct rb_root lat_snapshot_root;
|
||||
|
||||
static struct thread_latency *
|
||||
thread_latency_search(struct rb_root *root, struct thread *thread)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
|
||||
while (node) {
|
||||
struct thread_latency *lat;
|
||||
|
||||
lat = container_of(node, struct thread_latency, node);
|
||||
if (thread->pid < lat->thread->pid)
|
||||
node = node->rb_left;
|
||||
else if (thread->pid > lat->thread->pid)
|
||||
node = node->rb_right;
|
||||
else {
|
||||
return lat;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
__thread_latency_insert(struct rb_root *root, struct thread_latency *data)
|
||||
{
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
|
||||
while (*new) {
|
||||
struct thread_latency *this;
|
||||
|
||||
this = container_of(*new, struct thread_latency, node);
|
||||
parent = *new;
|
||||
if (data->thread->pid < this->thread->pid)
|
||||
new = &((*new)->rb_left);
|
||||
else if (data->thread->pid > this->thread->pid)
|
||||
new = &((*new)->rb_right);
|
||||
else
|
||||
die("Double thread insertion\n");
|
||||
}
|
||||
|
||||
rb_link_node(&data->node, parent, new);
|
||||
rb_insert_color(&data->node, root);
|
||||
}
|
||||
|
||||
static void thread_latency_insert(struct thread *thread)
|
||||
{
|
||||
struct thread_latency *lat;
|
||||
lat = calloc(sizeof(*lat), 1);
|
||||
if (!lat)
|
||||
die("No memory");
|
||||
|
||||
lat->thread = thread;
|
||||
INIT_LIST_HEAD(&lat->snapshot_list);
|
||||
__thread_latency_insert(&lat_snapshot_root, lat);
|
||||
}
|
||||
|
||||
static void
|
||||
latency_fork_event(struct trace_fork_event *fork_event __used,
|
||||
struct event *event __used,
|
||||
int cpu __used,
|
||||
u64 timestamp __used,
|
||||
struct thread *thread __used)
|
||||
{
|
||||
/* should insert the newcomer */
|
||||
}
|
||||
|
||||
static char sched_out_state(struct trace_switch_event *switch_event)
|
||||
{
|
||||
const char *str = TASK_STATE_TO_CHAR_STR;
|
||||
|
||||
return str[switch_event->prev_state];
|
||||
}
|
||||
|
||||
static void
|
||||
lat_sched_out(struct thread_latency *lat,
|
||||
struct trace_switch_event *switch_event)
|
||||
{
|
||||
struct lat_snapshot *snapshot;
|
||||
|
||||
if (sched_out_state(switch_event) == 'R')
|
||||
return;
|
||||
|
||||
snapshot = calloc(sizeof(*snapshot), 1);
|
||||
if (!snapshot)
|
||||
die("Non memory");
|
||||
|
||||
list_add_tail(&snapshot->list, &lat->snapshot_list);
|
||||
}
|
||||
|
||||
static void
|
||||
lat_sched_in(struct thread_latency *lat, u64 timestamp)
|
||||
{
|
||||
struct lat_snapshot *snapshot;
|
||||
|
||||
if (list_empty(&lat->snapshot_list))
|
||||
return;
|
||||
|
||||
snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
|
||||
list);
|
||||
|
||||
if (snapshot->state != THREAD_WAKED_UP)
|
||||
return;
|
||||
|
||||
if (timestamp < snapshot->wake_up_time) {
|
||||
snapshot->state = THREAD_IGNORE;
|
||||
return;
|
||||
}
|
||||
|
||||
snapshot->state = THREAD_SCHED_IN;
|
||||
snapshot->sched_in_time = timestamp;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
latency_switch_event(struct trace_switch_event *switch_event,
|
||||
struct event *event __used,
|
||||
int cpu __used,
|
||||
u64 timestamp,
|
||||
struct thread *thread __used)
|
||||
{
|
||||
struct thread_latency *out_lat, *in_lat;
|
||||
struct thread *sched_out, *sched_in;
|
||||
|
||||
sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
|
||||
sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
|
||||
|
||||
in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
|
||||
if (!in_lat) {
|
||||
thread_latency_insert(sched_in);
|
||||
in_lat = thread_latency_search(&lat_snapshot_root, sched_in);
|
||||
if (!in_lat)
|
||||
die("Internal latency tree error");
|
||||
}
|
||||
|
||||
out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
|
||||
if (!out_lat) {
|
||||
thread_latency_insert(sched_out);
|
||||
out_lat = thread_latency_search(&lat_snapshot_root, sched_out);
|
||||
if (!out_lat)
|
||||
die("Internal latency tree error");
|
||||
}
|
||||
|
||||
lat_sched_in(in_lat, timestamp);
|
||||
lat_sched_out(out_lat, switch_event);
|
||||
}
|
||||
|
||||
static void
|
||||
latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
|
||||
struct event *event __used,
|
||||
int cpu __used,
|
||||
u64 timestamp,
|
||||
struct thread *thread __used)
|
||||
{
|
||||
struct thread_latency *lat;
|
||||
struct lat_snapshot *snapshot;
|
||||
struct thread *wakee;
|
||||
|
||||
/* Note for later, it may be interesting to observe the failing cases */
|
||||
if (!wakeup_event->success)
|
||||
return;
|
||||
|
||||
wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
|
||||
lat = thread_latency_search(&lat_snapshot_root, wakee);
|
||||
if (!lat) {
|
||||
thread_latency_insert(wakee);
|
||||
return;
|
||||
}
|
||||
|
||||
if (list_empty(&lat->snapshot_list))
|
||||
return;
|
||||
|
||||
snapshot = list_entry(lat->snapshot_list.prev, struct lat_snapshot,
|
||||
list);
|
||||
|
||||
if (snapshot->state != THREAD_SLEEPING)
|
||||
return;
|
||||
|
||||
snapshot->state = THREAD_WAKED_UP;
|
||||
snapshot->wake_up_time = timestamp;
|
||||
}
|
||||
|
||||
static struct trace_sched_handler lat_ops = {
|
||||
.wakeup_event = latency_wakeup_event,
|
||||
.switch_event = latency_switch_event,
|
||||
.fork_event = latency_fork_event,
|
||||
};
|
||||
|
||||
static void output_lat_thread(struct thread_latency *lat)
|
||||
{
|
||||
struct lat_snapshot *shot;
|
||||
int count = 0;
|
||||
int i;
|
||||
int ret;
|
||||
u64 max = 0, avg;
|
||||
u64 total = 0, delta;
|
||||
|
||||
list_for_each_entry(shot, &lat->snapshot_list, list) {
|
||||
if (shot->state != THREAD_SCHED_IN)
|
||||
continue;
|
||||
|
||||
count++;
|
||||
|
||||
delta = shot->sched_in_time - shot->wake_up_time;
|
||||
if (delta > max)
|
||||
max = delta;
|
||||
total += delta;
|
||||
}
|
||||
|
||||
if (!count)
|
||||
return;
|
||||
|
||||
ret = printf("%s", lat->thread->comm);
|
||||
|
||||
for (i = 0; i < 25 - ret; i++)
|
||||
printf(" ");
|
||||
|
||||
avg = total / count;
|
||||
|
||||
printf("%5d %10llu %10llu %10llu\n", count, total, avg, max);
|
||||
}
|
||||
|
||||
static void output_lat_results(void)
|
||||
{
|
||||
struct rb_node *next;
|
||||
|
||||
printf(" Tasks");
|
||||
printf(" count");
|
||||
printf(" total");
|
||||
printf(" avg");
|
||||
printf(" max\n\n");
|
||||
|
||||
next = rb_first(&lat_snapshot_root);
|
||||
|
||||
while (next) {
|
||||
struct thread_latency *lat;
|
||||
|
||||
lat = rb_entry(next, struct thread_latency, node);
|
||||
output_lat_thread(lat);
|
||||
next = rb_next(next);
|
||||
}
|
||||
}
|
||||
|
||||
static struct trace_sched_handler *trace_handler;
|
||||
|
||||
|
@ -1154,6 +1418,8 @@ static const struct option options[] = {
|
|||
"dump raw trace in ASCII"),
|
||||
OPT_BOOLEAN('r', "replay", &replay_mode,
|
||||
"replay sched behaviour from traces"),
|
||||
OPT_BOOLEAN('l', "latency", &lat_mode,
|
||||
"measure various latencies"),
|
||||
OPT_BOOLEAN('v', "verbose", &verbose,
|
||||
"be more verbose (show symbol address, etc)"),
|
||||
OPT_END()
|
||||
|
@ -1180,22 +1446,30 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
|
|||
|
||||
if (replay_mode)
|
||||
trace_handler = &replay_ops;
|
||||
else /* We may need a default subcommand */
|
||||
else if (lat_mode)
|
||||
trace_handler = &lat_ops;
|
||||
else /* We may need a default subcommand (perf trace?) */
|
||||
die("Please select a sub command (-r)\n");
|
||||
|
||||
calibrate_run_measurement_overhead();
|
||||
calibrate_sleep_measurement_overhead();
|
||||
if (replay_mode) {
|
||||
calibrate_run_measurement_overhead();
|
||||
calibrate_sleep_measurement_overhead();
|
||||
|
||||
test_calibrations();
|
||||
test_calibrations();
|
||||
|
||||
parse_trace();
|
||||
print_task_traces();
|
||||
add_cross_task_wakeups();
|
||||
parse_trace();
|
||||
print_task_traces();
|
||||
add_cross_task_wakeups();
|
||||
|
||||
create_tasks();
|
||||
printf("------------------------------------------------------------\n");
|
||||
for (i = 0; i < nr_iterations; i++)
|
||||
run_one_test();
|
||||
create_tasks();
|
||||
printf("------------------------------------------------------------\n");
|
||||
for (i = 0; i < nr_iterations; i++)
|
||||
run_one_test();
|
||||
} else if (lat_mode) {
|
||||
setup_pager();
|
||||
__cmd_sched();
|
||||
output_lat_results();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user