forked from luck/tmp_suning_uos_patched
492d4d876c
Linux 5.9 introduced perf test case "Parse and process metrics" and on s390 this test case always dumps core: [root@t35lp67 perf]# ./perf test -vvvv -F 67 67: Parse and process metrics : --- start --- metric expr inst_retired.any / cpu_clk_unhalted.thread for IPC parsing metric: inst_retired.any / cpu_clk_unhalted.thread Segmentation fault (core dumped) [root@t35lp67 perf]# I debugged this core dump and gdb shows this call chain: (gdb) where #0 0x000003ffabc3192a in __strnlen_c_1 () from /lib64/libc.so.6 #1 0x000003ffabc293de in strcasestr () from /lib64/libc.so.6 #2 0x0000000001102ba2 in match_metric(list=0x1e6ea20 "inst_retired.any", n=<optimized out>) at util/metricgroup.c:368 #3 find_metric (map=<optimized out>, map=<optimized out>, metric=0x1e6ea20 "inst_retired.any") at util/metricgroup.c:765 #4 __resolve_metric (ids=0x0, map=<optimized out>, metric_list=0x0, metric_no_group=<optimized out>, m=<optimized out>) at util/metricgroup.c:844 #5 resolve_metric (ids=0x0, map=0x0, metric_list=0x0, metric_no_group=<optimized out>) at util/metricgroup.c:881 #6 metricgroup__add_metric (metric=<optimized out>, metric_no_group=metric_no_group@entry=false, events=<optimized out>, events@entry=0x3ffd84fb878, metric_list=0x0, metric_list@entry=0x3ffd84fb868, map=0x0) at util/metricgroup.c:943 #7 0x00000000011034ae in metricgroup__add_metric_list (map=0x13f9828 <map>, metric_list=0x3ffd84fb868, events=0x3ffd84fb878, metric_no_group=<optimized out>, list=<optimized out>) at util/metricgroup.c:988 #8 parse_groups (perf_evlist=perf_evlist@entry=0x1e70260, str=str@entry=0x12f34b2 "IPC", metric_no_group=<optimized out>, metric_no_merge=<optimized out>, fake_pmu=fake_pmu@entry=0x1462f18 <perf_pmu.fake>, metric_events=0x3ffd84fba58, map=0x1) at util/metricgroup.c:1040 #9 0x0000000001103eb2 in metricgroup__parse_groups_test( evlist=evlist@entry=0x1e70260, map=map@entry=0x13f9828 <map>, str=str@entry=0x12f34b2 "IPC", metric_no_group=metric_no_group@entry=false, metric_no_merge=metric_no_merge@entry=false, metric_events=0x3ffd84fba58) at util/metricgroup.c:1082 #10 0x00000000010c84d8 in __compute_metric (ratio2=0x0, name2=0x0, ratio1=<synthetic pointer>, name1=0x12f34b2 "IPC", vals=0x3ffd84fbad8, name=0x12f34b2 "IPC") at tests/parse-metric.c:159 #11 compute_metric (ratio=<synthetic pointer>, vals=0x3ffd84fbad8, name=0x12f34b2 "IPC") at tests/parse-metric.c:189 #12 test_ipc () at tests/parse-metric.c:208 ..... ..... omitted many more lines This test case was added with commit218ca91df4
("perf tests: Add parse metric test for frontend metric"). When I compile with make DEBUG=y it works fine and I do not get a core dump. It turned out that the above listed function call chain worked on a struct pmu_event array which requires a trailing element with zeroes which was missing. The marco map_for_each_event() loops over that array tests for members metric_expr/metric_name/metric_group being non-NULL. Adding this element fixes the issue. Output after: [root@t35lp46 perf]# ./perf test 67 67: Parse and process metrics : Ok [root@t35lp46 perf]# Committer notes: As Ian remarks, this is not s390 specific: <quote Ian> This also shows up with address sanitizer on all architectures (perhaps change the patch title) and perhaps add a "Fixes: <commit>" tag. ================================================================= ==4718==ERROR: AddressSanitizer: global-buffer-overflow on address 0x55c93b4d59e8 at pc 0x55c93a1541e2 bp 0x7ffd24327c60 sp 0x7ffd24327c58 READ of size 8 at 0x55c93b4d59e8 thread T0 #0 0x55c93a1541e1 in find_metric tools/perf/util/metricgroup.c:764:2 #1 0x55c93a153e6c in __resolve_metric tools/perf/util/metricgroup.c:844:9 #2 0x55c93a152f18 in resolve_metric tools/perf/util/metricgroup.c:881:9 #3 0x55c93a1528db in metricgroup__add_metric tools/perf/util/metricgroup.c:943:9 #4 0x55c93a151996 in metricgroup__add_metric_list tools/perf/util/metricgroup.c:988:9 #5 0x55c93a1511b9 in parse_groups tools/perf/util/metricgroup.c:1040:8 #6 0x55c93a1513e1 in metricgroup__parse_groups_test tools/perf/util/metricgroup.c:1082:9 #7 0x55c93a0108ae in __compute_metric tools/perf/tests/parse-metric.c:159:8 #8 0x55c93a010744 in compute_metric tools/perf/tests/parse-metric.c:189:9 #9 0x55c93a00f5ee in test_ipc tools/perf/tests/parse-metric.c:208:2 #10 0x55c93a00f1e8 in test__parse_metric tools/perf/tests/parse-metric.c:345:2 #11 0x55c939fd7202 in run_test tools/perf/tests/builtin-test.c:410:9 #12 0x55c939fd6736 in test_and_print tools/perf/tests/builtin-test.c:440:9 #13 0x55c939fd58c3 in __cmd_test tools/perf/tests/builtin-test.c:661:4 #14 0x55c939fd4e02 in cmd_test tools/perf/tests/builtin-test.c:807:9 #15 0x55c939e4763d in run_builtin tools/perf/perf.c:313:11 #16 0x55c939e46475 in handle_internal_command tools/perf/perf.c:365:8 #17 0x55c939e4737e in run_argv tools/perf/perf.c:409:2 #18 0x55c939e45f7e in main tools/perf/perf.c:539:3 0x55c93b4d59e8 is located 0 bytes to the right of global variable 'pme_test' defined in 'tools/perf/tests/parse-metric.c:17:25' (0x55c93b4d54a0) of size 1352 SUMMARY: AddressSanitizer: global-buffer-overflow tools/perf/util/metricgroup.c:764:2 in find_metric Shadow bytes around the buggy address: 0x0ab9a7692ae0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692af0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 =>0x0ab9a7692b30: 00 00 00 00 00 00 00 00 00 00 00 00 00[f9]f9 f9 0x0ab9a7692b40: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 0x0ab9a7692b50: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 0x0ab9a7692b60: f9 f9 f9 f9 f9 f9 f9 f9 00 00 00 00 00 00 00 00 0x0ab9a7692b70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ab9a7692b80: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Shadow gap: cc </quote> I'm also adding the missing "Fixes" tag and setting just .name to NULL, as doing it that way is more compact (the compiler will zero out everything else) and the table iterators look for .name being NULL as the sentinel marking the end of the table. Fixes:0a507af9c6
("perf tests: Add parse metric test for ipc metric") Signed-off-by: Thomas Richter <tmricht@linux.ibm.com> Reviewed-by: Sumanth Korikkar <sumanthk@linux.ibm.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Link: http://lore.kernel.org/lkml/20200825071211.16959-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
356 lines
9.1 KiB
C
356 lines
9.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/compiler.h>
|
|
#include <string.h>
|
|
#include <perf/cpumap.h>
|
|
#include <perf/evlist.h>
|
|
#include "metricgroup.h"
|
|
#include "tests.h"
|
|
#include "pmu-events/pmu-events.h"
|
|
#include "evlist.h"
|
|
#include "rblist.h"
|
|
#include "debug.h"
|
|
#include "expr.h"
|
|
#include "stat.h"
|
|
#include <perf/cpumap.h>
|
|
#include <perf/evlist.h>
|
|
|
|
static struct pmu_event pme_test[] = {
|
|
{
|
|
.metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
|
|
.metric_name = "IPC",
|
|
.metric_group = "group1",
|
|
},
|
|
{
|
|
.metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
|
|
"( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
|
|
.metric_name = "Frontend_Bound_SMT",
|
|
},
|
|
{
|
|
.metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
|
|
.metric_name = "dcache_miss_cpi",
|
|
},
|
|
{
|
|
.metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
|
|
.metric_name = "icache_miss_cycles",
|
|
},
|
|
{
|
|
.metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
|
|
.metric_name = "cache_miss_cycles",
|
|
.metric_group = "group1",
|
|
},
|
|
{
|
|
.metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
|
|
.metric_name = "DCache_L2_All_Hits",
|
|
},
|
|
{
|
|
.metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
|
|
"l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
|
|
.metric_name = "DCache_L2_All_Miss",
|
|
},
|
|
{
|
|
.metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
|
|
.metric_name = "DCache_L2_All",
|
|
},
|
|
{
|
|
.metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
|
|
.metric_name = "DCache_L2_Hits",
|
|
},
|
|
{
|
|
.metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
|
|
.metric_name = "DCache_L2_Misses",
|
|
},
|
|
{
|
|
.metric_expr = "ipc + m2",
|
|
.metric_name = "M1",
|
|
},
|
|
{
|
|
.metric_expr = "ipc + m1",
|
|
.metric_name = "M2",
|
|
},
|
|
{
|
|
.metric_expr = "1/m3",
|
|
.metric_name = "M3",
|
|
},
|
|
{
|
|
.name = NULL,
|
|
}
|
|
};
|
|
|
|
static struct pmu_events_map map = {
|
|
.cpuid = "test",
|
|
.version = "1",
|
|
.type = "core",
|
|
.table = pme_test,
|
|
};
|
|
|
|
struct value {
|
|
const char *event;
|
|
u64 val;
|
|
};
|
|
|
|
static u64 find_value(const char *name, struct value *values)
|
|
{
|
|
struct value *v = values;
|
|
|
|
while (v->event) {
|
|
if (!strcmp(name, v->event))
|
|
return v->val;
|
|
v++;
|
|
};
|
|
return 0;
|
|
}
|
|
|
|
static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
|
|
struct value *vals)
|
|
{
|
|
struct evsel *evsel;
|
|
u64 count;
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
count = find_value(evsel->name, vals);
|
|
perf_stat__update_shadow_stats(evsel, count, 0, st);
|
|
}
|
|
}
|
|
|
|
static double compute_single(struct rblist *metric_events, struct evlist *evlist,
|
|
struct runtime_stat *st, const char *name)
|
|
{
|
|
struct metric_expr *mexp;
|
|
struct metric_event *me;
|
|
struct evsel *evsel;
|
|
|
|
evlist__for_each_entry(evlist, evsel) {
|
|
me = metricgroup__lookup(metric_events, evsel, false);
|
|
if (me != NULL) {
|
|
list_for_each_entry (mexp, &me->head, nd) {
|
|
if (strcmp(mexp->metric_name, name))
|
|
continue;
|
|
return test_generic_metric(mexp, 0, st);
|
|
}
|
|
}
|
|
}
|
|
return 0.;
|
|
}
|
|
|
|
static int __compute_metric(const char *name, struct value *vals,
|
|
const char *name1, double *ratio1,
|
|
const char *name2, double *ratio2)
|
|
{
|
|
struct rblist metric_events = {
|
|
.nr_entries = 0,
|
|
};
|
|
struct perf_cpu_map *cpus;
|
|
struct runtime_stat st;
|
|
struct evlist *evlist;
|
|
int err;
|
|
|
|
/*
|
|
* We need to prepare evlist for stat mode running on CPU 0
|
|
* because that's where all the stats are going to be created.
|
|
*/
|
|
evlist = evlist__new();
|
|
if (!evlist)
|
|
return -ENOMEM;
|
|
|
|
cpus = perf_cpu_map__new("0");
|
|
if (!cpus)
|
|
return -ENOMEM;
|
|
|
|
perf_evlist__set_maps(&evlist->core, cpus, NULL);
|
|
|
|
/* Parse the metric into metric_events list. */
|
|
err = metricgroup__parse_groups_test(evlist, &map, name,
|
|
false, false,
|
|
&metric_events);
|
|
if (err)
|
|
return err;
|
|
|
|
if (perf_evlist__alloc_stats(evlist, false))
|
|
return -1;
|
|
|
|
/* Load the runtime stats with given numbers for events. */
|
|
runtime_stat__init(&st);
|
|
load_runtime_stat(&st, evlist, vals);
|
|
|
|
/* And execute the metric */
|
|
if (name1 && ratio1)
|
|
*ratio1 = compute_single(&metric_events, evlist, &st, name1);
|
|
if (name2 && ratio2)
|
|
*ratio2 = compute_single(&metric_events, evlist, &st, name2);
|
|
|
|
/* ... clenup. */
|
|
metricgroup__rblist_exit(&metric_events);
|
|
runtime_stat__exit(&st);
|
|
perf_evlist__free_stats(evlist);
|
|
perf_cpu_map__put(cpus);
|
|
evlist__delete(evlist);
|
|
return 0;
|
|
}
|
|
|
|
static int compute_metric(const char *name, struct value *vals, double *ratio)
|
|
{
|
|
return __compute_metric(name, vals, name, ratio, NULL, NULL);
|
|
}
|
|
|
|
static int compute_metric_group(const char *name, struct value *vals,
|
|
const char *name1, double *ratio1,
|
|
const char *name2, double *ratio2)
|
|
{
|
|
return __compute_metric(name, vals, name1, ratio1, name2, ratio2);
|
|
}
|
|
|
|
static int test_ipc(void)
|
|
{
|
|
double ratio;
|
|
struct value vals[] = {
|
|
{ .event = "inst_retired.any", .val = 300 },
|
|
{ .event = "cpu_clk_unhalted.thread", .val = 200 },
|
|
{ .event = NULL, },
|
|
};
|
|
|
|
TEST_ASSERT_VAL("failed to compute metric",
|
|
compute_metric("IPC", vals, &ratio) == 0);
|
|
|
|
TEST_ASSERT_VAL("IPC failed, wrong ratio",
|
|
ratio == 1.5);
|
|
return 0;
|
|
}
|
|
|
|
static int test_frontend(void)
|
|
{
|
|
double ratio;
|
|
struct value vals[] = {
|
|
{ .event = "idq_uops_not_delivered.core", .val = 300 },
|
|
{ .event = "cpu_clk_unhalted.thread", .val = 200 },
|
|
{ .event = "cpu_clk_unhalted.one_thread_active", .val = 400 },
|
|
{ .event = "cpu_clk_unhalted.ref_xclk", .val = 600 },
|
|
{ .event = NULL, },
|
|
};
|
|
|
|
TEST_ASSERT_VAL("failed to compute metric",
|
|
compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0);
|
|
|
|
TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio",
|
|
ratio == 0.45);
|
|
return 0;
|
|
}
|
|
|
|
static int test_cache_miss_cycles(void)
|
|
{
|
|
double ratio;
|
|
struct value vals[] = {
|
|
{ .event = "l1d-loads-misses", .val = 300 },
|
|
{ .event = "l1i-loads-misses", .val = 200 },
|
|
{ .event = "inst_retired.any", .val = 400 },
|
|
{ .event = NULL, },
|
|
};
|
|
|
|
TEST_ASSERT_VAL("failed to compute metric",
|
|
compute_metric("cache_miss_cycles", vals, &ratio) == 0);
|
|
|
|
TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio",
|
|
ratio == 1.25);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi
|
|
* DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) +
|
|
* l2_rqsts.pf_miss + l2_rqsts.rfo_miss
|
|
* DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss
|
|
* DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all)
|
|
* DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all)
|
|
*
|
|
* l2_rqsts.demand_data_rd_hit = 100
|
|
* l2_rqsts.pf_hit = 200
|
|
* l2_rqsts.rfo_hi = 300
|
|
* l2_rqsts.all_demand_data_rd = 400
|
|
* l2_rqsts.pf_miss = 500
|
|
* l2_rqsts.rfo_miss = 600
|
|
*
|
|
* DCache_L2_All_Hits = 600
|
|
* DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400
|
|
* DCache_L2_All = 600 + 1400 = 2000
|
|
* DCache_L2_Hits = 600 / 2000 = 0.3
|
|
* DCache_L2_Misses = 1400 / 2000 = 0.7
|
|
*/
|
|
static int test_dcache_l2(void)
|
|
{
|
|
double ratio;
|
|
struct value vals[] = {
|
|
{ .event = "l2_rqsts.demand_data_rd_hit", .val = 100 },
|
|
{ .event = "l2_rqsts.pf_hit", .val = 200 },
|
|
{ .event = "l2_rqsts.rfo_hit", .val = 300 },
|
|
{ .event = "l2_rqsts.all_demand_data_rd", .val = 400 },
|
|
{ .event = "l2_rqsts.pf_miss", .val = 500 },
|
|
{ .event = "l2_rqsts.rfo_miss", .val = 600 },
|
|
{ .event = NULL, },
|
|
};
|
|
|
|
TEST_ASSERT_VAL("failed to compute metric",
|
|
compute_metric("DCache_L2_Hits", vals, &ratio) == 0);
|
|
|
|
TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio",
|
|
ratio == 0.3);
|
|
|
|
TEST_ASSERT_VAL("failed to compute metric",
|
|
compute_metric("DCache_L2_Misses", vals, &ratio) == 0);
|
|
|
|
TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio",
|
|
ratio == 0.7);
|
|
return 0;
|
|
}
|
|
|
|
static int test_recursion_fail(void)
|
|
{
|
|
double ratio;
|
|
struct value vals[] = {
|
|
{ .event = "inst_retired.any", .val = 300 },
|
|
{ .event = "cpu_clk_unhalted.thread", .val = 200 },
|
|
{ .event = NULL, },
|
|
};
|
|
|
|
TEST_ASSERT_VAL("failed to find recursion",
|
|
compute_metric("M1", vals, &ratio) == -1);
|
|
|
|
TEST_ASSERT_VAL("failed to find recursion",
|
|
compute_metric("M3", vals, &ratio) == -1);
|
|
return 0;
|
|
}
|
|
|
|
static int test_metric_group(void)
|
|
{
|
|
double ratio1, ratio2;
|
|
struct value vals[] = {
|
|
{ .event = "cpu_clk_unhalted.thread", .val = 200 },
|
|
{ .event = "l1d-loads-misses", .val = 300 },
|
|
{ .event = "l1i-loads-misses", .val = 200 },
|
|
{ .event = "inst_retired.any", .val = 400 },
|
|
{ .event = NULL, },
|
|
};
|
|
|
|
TEST_ASSERT_VAL("failed to find recursion",
|
|
compute_metric_group("group1", vals,
|
|
"IPC", &ratio1,
|
|
"cache_miss_cycles", &ratio2) == 0);
|
|
|
|
TEST_ASSERT_VAL("group IPC failed, wrong ratio",
|
|
ratio1 == 2.0);
|
|
|
|
TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio",
|
|
ratio2 == 1.25);
|
|
return 0;
|
|
}
|
|
|
|
int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
|
|
{
|
|
TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
|
|
TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
|
|
TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
|
|
TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
|
|
TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
|
|
TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
|
|
return 0;
|
|
}
|