kernel_optimize_test/tools/perf/examples/bpf/augmented_syscalls.c

174 lines
5.2 KiB
C
Raw Normal View History

perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
// SPDX-License-Identifier: GPL-2.0
/*
* Augment syscalls with the contents of the pointer arguments.
perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
*
* Test it with:
*
* perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
*
* It'll catch some openat syscalls related to the dynamic linked and
* the last one should be the one for '/etc/passwd'.
*
* This matches what is marshalled into the raw_syscall:sys_enter payload
* expected by the 'perf trace' beautifiers, and can be used by them, that will
* check if perf_sample->raw_data is more than what is expected for each
* syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
* contents of pointer arguments.
perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
*/
#include <stdio.h>
perf trace augmented_syscalls: Augment connect's 'sockaddr' arg As the first example of augmenting something other than a 'filename', augment the 'struct sockaddr' argument for the 'connect' syscall: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c ssh -6 fedorapeople.org 0.000 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 0.042 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.329 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.362 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.458 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.478 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.683 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET, port: 53, addr: 192.168.43.1 }, addrlen: 16) 4.710 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET6, port: 22, addr: 2610:28:3090:3001:5054:ff:fea7:9474 }, addrlen: 28) root@fedorapeople.org: Permission denied (publickey). # This is still just augmenting the syscalls:sys_enter_connect part, later we'll wire this up to augment the enter+exit combo, like in the tradicional 'perf trace' and 'strace' outputs. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-s7l541cbiqb22ifio6z7dpf6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-29 03:24:44 +08:00
#include <linux/socket.h>
perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
struct bpf_map SEC("maps") __augmented_syscalls__ = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = __NR_CPUS__,
};
struct syscall_exit_args {
unsigned long long common_tp_fields;
long syscall_nr;
long ret;
};
perf trace: Use the augmented filename, expanding syscall enter pointers This is the final touch in showing how a syscall argument beautifier can access the augmented args put in place by the tools/perf/examples/bpf/augmented_syscalls.c eBPF script, right after the regular raw syscall args, i.e. the up to 6 long integer values in the syscall interface. With this we are able to show the 'openat' syscall arg, now with up to 64 bytes, but in time this will be configurable, just like with the 'strace -s strsize' argument, from 'strace''s man page: -s strsize Specify the maximum string size to print (the default is 32). This actually is the maximum string to _collect_ and store in the ring buffer, not just print. Before: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/9658 openat(dfd: CWD, filename: 0x6626eda8, flags: CLOEXEC) 0.017 ( 0.007 ms): cat/9658 openat(dfd: CWD, filename: 0x6626eda8, flags: CLOEXEC) = 3 0.049 ( ): cat/9658 openat(dfd: CWD, filename: 0x66476ce0, flags: CLOEXEC) 0.051 ( 0.007 ms): cat/9658 openat(dfd: CWD, filename: 0x66476ce0, flags: CLOEXEC) = 3 0.377 ( ): cat/9658 openat(dfd: CWD, filename: 0x1e8f806b) 0.379 ( 0.005 ms): cat/9658 openat(dfd: CWD, filename: 0x1e8f806b) = 3 # After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/11966 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.006 ( 0.006 ms): cat/11966 openat(dfd: CWD, filename: 0x4bfdcda8, flags: CLOEXEC) = 3 0.034 ( ): cat/11966 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.036 ( 0.008 ms): cat/11966 openat(dfd: CWD, filename: 0x4c1e4ce0, flags: CLOEXEC) = 3 0.375 ( ): cat/11966 openat(dfd: CWD, filename: /etc/passwd) 0.377 ( 0.005 ms): cat/11966 openat(dfd: CWD, filename: 0xe87906b) = 3 # This cset should show all the aspects of establishing a protocol between an eBPF syscall arg augmenter program, tools/perf/examples/bpf/augmented_syscalls.c and a 'perf trace' beautifier, the one associated with all 'char *' point syscall args with names that can heuristically be associated with filenames. Now to wire up 'open' to show a second syscall using this scheme, all we have to do now is to change tools/perf/examples/bpf/augmented_syscalls.c, as 'perf trace' will notice that the perf_sample.raw_size is more than what is expected for a particular syscall payload as defined by its tracefs format file and will then use the augmented payload in the 'filename' syscall arg beautifier. The same protocol will be used for structs such as 'struct sockaddr *', 'struct pollfd', etc, with additions for handling arrays. This will all be done under the hood when 'perf trace' realizes the system has the necessary components, and also can be done by providing a precompiled augmented_syscalls.c eBPF ELF object. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-gj9kqb61wo7m3shtpzercbcr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-21 23:00:39 +08:00
struct augmented_filename {
unsigned int size;
int reserved;
char value[256];
perf trace: Use the augmented filename, expanding syscall enter pointers This is the final touch in showing how a syscall argument beautifier can access the augmented args put in place by the tools/perf/examples/bpf/augmented_syscalls.c eBPF script, right after the regular raw syscall args, i.e. the up to 6 long integer values in the syscall interface. With this we are able to show the 'openat' syscall arg, now with up to 64 bytes, but in time this will be configurable, just like with the 'strace -s strsize' argument, from 'strace''s man page: -s strsize Specify the maximum string size to print (the default is 32). This actually is the maximum string to _collect_ and store in the ring buffer, not just print. Before: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/9658 openat(dfd: CWD, filename: 0x6626eda8, flags: CLOEXEC) 0.017 ( 0.007 ms): cat/9658 openat(dfd: CWD, filename: 0x6626eda8, flags: CLOEXEC) = 3 0.049 ( ): cat/9658 openat(dfd: CWD, filename: 0x66476ce0, flags: CLOEXEC) 0.051 ( 0.007 ms): cat/9658 openat(dfd: CWD, filename: 0x66476ce0, flags: CLOEXEC) = 3 0.377 ( ): cat/9658 openat(dfd: CWD, filename: 0x1e8f806b) 0.379 ( 0.005 ms): cat/9658 openat(dfd: CWD, filename: 0x1e8f806b) = 3 # After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): cat/11966 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.006 ( 0.006 ms): cat/11966 openat(dfd: CWD, filename: 0x4bfdcda8, flags: CLOEXEC) = 3 0.034 ( ): cat/11966 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.036 ( 0.008 ms): cat/11966 openat(dfd: CWD, filename: 0x4c1e4ce0, flags: CLOEXEC) = 3 0.375 ( ): cat/11966 openat(dfd: CWD, filename: /etc/passwd) 0.377 ( 0.005 ms): cat/11966 openat(dfd: CWD, filename: 0xe87906b) = 3 # This cset should show all the aspects of establishing a protocol between an eBPF syscall arg augmenter program, tools/perf/examples/bpf/augmented_syscalls.c and a 'perf trace' beautifier, the one associated with all 'char *' point syscall args with names that can heuristically be associated with filenames. Now to wire up 'open' to show a second syscall using this scheme, all we have to do now is to change tools/perf/examples/bpf/augmented_syscalls.c, as 'perf trace' will notice that the perf_sample.raw_size is more than what is expected for a particular syscall payload as defined by its tracefs format file and will then use the augmented payload in the 'filename' syscall arg beautifier. The same protocol will be used for structs such as 'struct sockaddr *', 'struct pollfd', etc, with additions for handling arrays. This will all be done under the hood when 'perf trace' realizes the system has the necessary components, and also can be done by providing a precompiled augmented_syscalls.c eBPF ELF object. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-gj9kqb61wo7m3shtpzercbcr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-21 23:00:39 +08:00
};
#define augmented_filename_syscall(syscall) \
struct augmented_enter_##syscall##_args { \
struct syscall_enter_##syscall##_args args; \
struct augmented_filename filename; \
}; \
int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
{ \
struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \
unsigned int len = sizeof(augmented_args); \
probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \
sizeof(augmented_args.filename.value), \
args->filename_ptr); \
perf augmented_syscalls: Avoid optimization to pass older BPF validators See https://www.spinics.net/lists/netdev/msg480099.html for the whole discussio, but to make the augmented_syscalls.c BPF program to get built and loaded successfully in a greater range of kernels, add an extra check. Related patch: a60dd35d2e39 ("bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO") That is in the kernel since v4.15, I couldn't figure why this is hitting me with 4.17.17, but adding the workaround discussed there makes this work with this fedora kernel and with 4.18.recent. Before: # uname -a Linux seventh 4.17.17-100.fc27.x86_64 #1 SMP Mon Aug 20 15:53:11 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (bf) r6 = r1 1: (b7) r1 = 0 2: (7b) *(u64 *)(r10 -8) = r1 3: (7b) *(u64 *)(r10 -16) = r1 4: (7b) *(u64 *)(r10 -24) = r1 5: (7b) *(u64 *)(r10 -32) = r1 6: (7b) *(u64 *)(r10 -40) = r1 7: (7b) *(u64 *)(r10 -48) = r1 8: (7b) *(u64 *)(r10 -56) = r1 9: (7b) *(u64 *)(r10 -64) = r1 10: (7b) *(u64 *)(r10 -72) = r1 11: (7b) *(u64 *)(r10 -80) = r1 12: (7b) *(u64 *)(r10 -88) = r1 13: (7b) *(u64 *)(r10 -96) = r1 14: (7b) *(u64 *)(r10 -104) = r1 15: (7b) *(u64 *)(r10 -112) = r1 16: (7b) *(u64 *)(r10 -120) = r1 17: (7b) *(u64 *)(r10 -128) = r1 18: (7b) *(u64 *)(r10 -136) = r1 19: (7b) *(u64 *)(r10 -144) = r1 20: (7b) *(u64 *)(r10 -152) = r1 21: (7b) *(u64 *)(r10 -160) = r1 22: (7b) *(u64 *)(r10 -168) = r1 23: (7b) *(u64 *)(r10 -176) = r1 24: (7b) *(u64 *)(r10 -184) = r1 25: (7b) *(u64 *)(r10 -192) = r1 26: (7b) *(u64 *)(r10 -200) = r1 27: (7b) *(u64 *)(r10 -208) = r1 28: (7b) *(u64 *)(r10 -216) = r1 29: (7b) *(u64 *)(r10 -224) = r1 30: (7b) *(u64 *)(r10 -232) = r1 31: (7b) *(u64 *)(r10 -240) = r1 32: (7b) *(u64 *)(r10 -248) = r1 33: (7b) *(u64 *)(r10 -256) = r1 34: (7b) *(u64 *)(r10 -264) = r1 35: (7b) *(u64 *)(r10 -272) = r1 36: (7b) *(u64 *)(r10 -280) = r1 37: (7b) *(u64 *)(r10 -288) = r1 38: (7b) *(u64 *)(r10 -296) = r1 39: (7b) *(u64 *)(r10 -304) = r1 40: (7b) *(u64 *)(r10 -312) = r1 41: (bf) r7 = r10 42: (07) r7 += -312 43: (bf) r1 = r7 44: (b7) r2 = 48 45: (bf) r3 = r6 46: (85) call bpf_probe_read#4 47: (79) r3 = *(u64 *)(r6 +24) 48: (bf) r1 = r10 49: (07) r1 += -256 50: (b7) r8 = 256 51: (b7) r2 = 256 52: (85) call bpf_probe_read_str#45 53: (bf) r1 = r0 54: (67) r1 <<= 32 55: (77) r1 >>= 32 56: (bf) r5 = r0 57: (07) r5 += 56 58: (2d) if r8 > r1 goto pc+1 R0=inv(id=0) R1=inv(id=0,umin_value=256,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 59: (b7) r5 = 312 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 70: (b7) r0 = 0 71: (95) exit from 58 to 60: R0=inv(id=0) R1=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 R5 unbounded memory access, use 'var &= const' or 'if (var < const)' libbpf: -- END LOG -- libbpf: failed to load program 'syscalls:sys_enter_openat' libbpf: failed to load object 'tools/perf/examples/bpf/augmented_syscalls.c' bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading) event syntax error: 'tools/perf/examples/bpf/augmented_syscalls.c' \___ Kernel verifier blocks program loading After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/29249 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.008 cat/29249 syscalls:sys_exit_openat:0x3 0.021 cat/29249 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.025 cat/29249 syscalls:sys_exit_openat:0x3 0.180 cat/29249 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.185 cat/29249 syscalls:sys_exit_open:0x3 0.242 cat/29249 openat(dfd: CWD, filename: /etc/passwd) 0.245 cat/29249 syscalls:sys_exit_openat:0x3 # It also works with a more recent kernel: # uname -a Linux jouet 4.18.0-00014-g4e67b2a5df5d #6 SMP Thu Aug 30 17:34:17 -03 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/26451 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.020 cat/26451 syscalls:sys_exit_openat:0x3 0.039 cat/26451 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.044 cat/26451 syscalls:sys_exit_openat:0x3 0.231 cat/26451 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.238 cat/26451 syscalls:sys_exit_open:0x3 0.278 cat/26451 openat(dfd: CWD, filename: /etc/passwd) 0.282 cat/26451 syscalls:sys_exit_openat:0x3 # Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: Gianluca Borello <g.borello@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Cc: Yonghong Song <yhs@fb.com> Link: https://lkml.kernel.org/n/tip-wkpsivs1a9afwldbul46btbv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-09-04 02:18:37 +08:00
if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \
len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \
perf augmented_syscalls: Avoid optimization to pass older BPF validators See https://www.spinics.net/lists/netdev/msg480099.html for the whole discussio, but to make the augmented_syscalls.c BPF program to get built and loaded successfully in a greater range of kernels, add an extra check. Related patch: a60dd35d2e39 ("bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO") That is in the kernel since v4.15, I couldn't figure why this is hitting me with 4.17.17, but adding the workaround discussed there makes this work with this fedora kernel and with 4.18.recent. Before: # uname -a Linux seventh 4.17.17-100.fc27.x86_64 #1 SMP Mon Aug 20 15:53:11 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (bf) r6 = r1 1: (b7) r1 = 0 2: (7b) *(u64 *)(r10 -8) = r1 3: (7b) *(u64 *)(r10 -16) = r1 4: (7b) *(u64 *)(r10 -24) = r1 5: (7b) *(u64 *)(r10 -32) = r1 6: (7b) *(u64 *)(r10 -40) = r1 7: (7b) *(u64 *)(r10 -48) = r1 8: (7b) *(u64 *)(r10 -56) = r1 9: (7b) *(u64 *)(r10 -64) = r1 10: (7b) *(u64 *)(r10 -72) = r1 11: (7b) *(u64 *)(r10 -80) = r1 12: (7b) *(u64 *)(r10 -88) = r1 13: (7b) *(u64 *)(r10 -96) = r1 14: (7b) *(u64 *)(r10 -104) = r1 15: (7b) *(u64 *)(r10 -112) = r1 16: (7b) *(u64 *)(r10 -120) = r1 17: (7b) *(u64 *)(r10 -128) = r1 18: (7b) *(u64 *)(r10 -136) = r1 19: (7b) *(u64 *)(r10 -144) = r1 20: (7b) *(u64 *)(r10 -152) = r1 21: (7b) *(u64 *)(r10 -160) = r1 22: (7b) *(u64 *)(r10 -168) = r1 23: (7b) *(u64 *)(r10 -176) = r1 24: (7b) *(u64 *)(r10 -184) = r1 25: (7b) *(u64 *)(r10 -192) = r1 26: (7b) *(u64 *)(r10 -200) = r1 27: (7b) *(u64 *)(r10 -208) = r1 28: (7b) *(u64 *)(r10 -216) = r1 29: (7b) *(u64 *)(r10 -224) = r1 30: (7b) *(u64 *)(r10 -232) = r1 31: (7b) *(u64 *)(r10 -240) = r1 32: (7b) *(u64 *)(r10 -248) = r1 33: (7b) *(u64 *)(r10 -256) = r1 34: (7b) *(u64 *)(r10 -264) = r1 35: (7b) *(u64 *)(r10 -272) = r1 36: (7b) *(u64 *)(r10 -280) = r1 37: (7b) *(u64 *)(r10 -288) = r1 38: (7b) *(u64 *)(r10 -296) = r1 39: (7b) *(u64 *)(r10 -304) = r1 40: (7b) *(u64 *)(r10 -312) = r1 41: (bf) r7 = r10 42: (07) r7 += -312 43: (bf) r1 = r7 44: (b7) r2 = 48 45: (bf) r3 = r6 46: (85) call bpf_probe_read#4 47: (79) r3 = *(u64 *)(r6 +24) 48: (bf) r1 = r10 49: (07) r1 += -256 50: (b7) r8 = 256 51: (b7) r2 = 256 52: (85) call bpf_probe_read_str#45 53: (bf) r1 = r0 54: (67) r1 <<= 32 55: (77) r1 >>= 32 56: (bf) r5 = r0 57: (07) r5 += 56 58: (2d) if r8 > r1 goto pc+1 R0=inv(id=0) R1=inv(id=0,umin_value=256,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 59: (b7) r5 = 312 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 70: (b7) r0 = 0 71: (95) exit from 58 to 60: R0=inv(id=0) R1=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 R5 unbounded memory access, use 'var &= const' or 'if (var < const)' libbpf: -- END LOG -- libbpf: failed to load program 'syscalls:sys_enter_openat' libbpf: failed to load object 'tools/perf/examples/bpf/augmented_syscalls.c' bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading) event syntax error: 'tools/perf/examples/bpf/augmented_syscalls.c' \___ Kernel verifier blocks program loading After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/29249 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.008 cat/29249 syscalls:sys_exit_openat:0x3 0.021 cat/29249 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.025 cat/29249 syscalls:sys_exit_openat:0x3 0.180 cat/29249 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.185 cat/29249 syscalls:sys_exit_open:0x3 0.242 cat/29249 openat(dfd: CWD, filename: /etc/passwd) 0.245 cat/29249 syscalls:sys_exit_openat:0x3 # It also works with a more recent kernel: # uname -a Linux jouet 4.18.0-00014-g4e67b2a5df5d #6 SMP Thu Aug 30 17:34:17 -03 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/26451 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.020 cat/26451 syscalls:sys_exit_openat:0x3 0.039 cat/26451 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.044 cat/26451 syscalls:sys_exit_openat:0x3 0.231 cat/26451 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.238 cat/26451 syscalls:sys_exit_open:0x3 0.278 cat/26451 openat(dfd: CWD, filename: /etc/passwd) 0.282 cat/26451 syscalls:sys_exit_openat:0x3 # Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: Gianluca Borello <g.borello@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Cc: Yonghong Song <yhs@fb.com> Link: https://lkml.kernel.org/n/tip-wkpsivs1a9afwldbul46btbv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-09-04 02:18:37 +08:00
len &= sizeof(augmented_args.filename.value) - 1; \
} \
/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
&augmented_args, len); \
} \
int syscall_exit(syscall)(struct syscall_exit_args *args) \
{ \
return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
}
perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
struct syscall_enter_openat_args {
unsigned long long common_tp_fields;
long syscall_nr;
long dfd;
char *filename_ptr;
long flags;
long mode;
};
augmented_filename_syscall(openat);
perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
perf trace: Augment the 'open' syscall 'filename' arg As described in the previous cset, all we had to do was to touch the augmented_syscalls.c eBPF program, fire up 'perf trace' with that new eBPF script in system wide mode and wait for 'open' syscalls, in addition to 'openat' ones to see that it works: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs.js, flags: CREAT|EXCL|TRUNC|WRONLY, mode: IRUSR|IWUSR) 0.065 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs-1.js, flags: CREAT|EXCL|TRUNC|WRONLY, mode: IRUSR|IWUSR) 0.435 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs-1.js, flags: CREAT|TRUNC|WRONLY, mode: IRUSR|IWUSR) 1.875 perf/16772 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 1227.260 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 1227.397 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 7227.619 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 7227.661 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 10018.079 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 10018.514 perf/16772 openat(dfd: CWD, filename: /proc/1237/status) 10018.568 perf/16772 openat(dfd: CWD, filename: /proc/1237/status) 10022.409 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 10090.044 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10090.351 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10090.407 perf/16772 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format) 10091.763 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10091.812 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10092.807 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10092.851 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10094.650 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10094.926 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10096.010 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10096.057 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10097.056 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10097.099 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13228.345 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 13232.734 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 15198.956 lighttpd/16748 open(filename: /proc/loadavg, mode: ISGID|IXOTH) ^C# It even catches 'perf' itself looking at the sys_enter_open and sys_enter_openat tracefs format dictionaries when it first finds them in the trace... :-) Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-upmogc57uatljr6el6u8537l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-21 23:20:06 +08:00
struct syscall_enter_open_args {
unsigned long long common_tp_fields;
long syscall_nr;
char *filename_ptr;
long flags;
long mode;
};
augmented_filename_syscall(open);
perf trace: Augment the 'open' syscall 'filename' arg As described in the previous cset, all we had to do was to touch the augmented_syscalls.c eBPF program, fire up 'perf trace' with that new eBPF script in system wide mode and wait for 'open' syscalls, in addition to 'openat' ones to see that it works: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs.js, flags: CREAT|EXCL|TRUNC|WRONLY, mode: IRUSR|IWUSR) 0.065 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs-1.js, flags: CREAT|EXCL|TRUNC|WRONLY, mode: IRUSR|IWUSR) 0.435 StreamT~s #200/16150 openat(dfd: CWD, filename: /home/acme/.mozilla/firefox/fqxhj76d.default/prefs-1.js, flags: CREAT|TRUNC|WRONLY, mode: IRUSR|IWUSR) 1.875 perf/16772 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 1227.260 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 1227.397 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 7227.619 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 7227.661 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 10018.079 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 10018.514 perf/16772 openat(dfd: CWD, filename: /proc/1237/status) 10018.568 perf/16772 openat(dfd: CWD, filename: /proc/1237/status) 10022.409 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 10090.044 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10090.351 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10090.407 perf/16772 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_open/format) 10091.763 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10091.812 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10092.807 NetworkManager/1237 openat(dfd: CWD, filename: /proc/2125/stat) 10092.851 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10094.650 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10094.926 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10096.010 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10096.057 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 10097.056 NetworkManager/1237 openat(dfd: CWD, filename: /proc/1463/stat) 10097.099 NetworkManager/1237 open(filename: /etc/passwd, flags: CLOEXEC) 13228.345 gnome-shell/1463 openat(dfd: CWD, filename: /proc/self/stat) 13232.734 gnome-shell/2125 openat(dfd: CWD, filename: /proc/self/stat) 15198.956 lighttpd/16748 open(filename: /proc/loadavg, mode: ISGID|IXOTH) ^C# It even catches 'perf' itself looking at the sys_enter_open and sys_enter_openat tracefs format dictionaries when it first finds them in the trace... :-) Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-upmogc57uatljr6el6u8537l@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-21 23:20:06 +08:00
perf trace: Augment inotify_add_watch pathname syscall arg Again, just changing tools/perf/examples/bpf/augmented_syscalls.c, that is starting to have too much boilerplate, some macro will come to the rescue. # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/cache/app-info/yaml, mask: 16789454) 0.023 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/app-info/xmls, mask: 16789454) 0.028 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/app-info/yaml, mask: 16789454) 0.032 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/share/app-info/yaml, mask: 16789454) 0.039 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/local/share/app-info/xmls, mask: 16789454) 0.045 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/local/share/app-info/yaml, mask: 16789454) 0.049 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /home/acme/.local/share/app-info/yaml, mask: 16789454) 0.056 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: , mask: 16789454) 0.010 gmain/2245 inotify_add_watch(fd: 7<anon_inode:inotify>, pathname: /home/acme/~, mask: 16789454) 0.087 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_inotify_add) 0.436 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 56.042 gmain/2791 inotify_add_watch(fd: 4<anon_inode:inotify>, pathname: /var/lib/fwupd/remotes.d/lvfs-testing, mask: 16789454) 113.986 gmain/1721 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/gdm/~, mask: 16789454) 3777.265 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 3777.550 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) ^C[root@jouet perf]# Still not combining raw_syscalls:sys_enter + raw_syscalls:sys_exit, to get it strace-like, but that probably will come very naturally with some more wiring up... Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-ol83juin2cht9vzquynec5hz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-22 00:21:36 +08:00
struct syscall_enter_inotify_add_watch_args {
unsigned long long common_tp_fields;
long syscall_nr;
long fd;
char *filename_ptr;
perf trace: Augment inotify_add_watch pathname syscall arg Again, just changing tools/perf/examples/bpf/augmented_syscalls.c, that is starting to have too much boilerplate, some macro will come to the rescue. # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/cache/app-info/yaml, mask: 16789454) 0.023 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/app-info/xmls, mask: 16789454) 0.028 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/app-info/yaml, mask: 16789454) 0.032 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/share/app-info/yaml, mask: 16789454) 0.039 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/local/share/app-info/xmls, mask: 16789454) 0.045 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/local/share/app-info/yaml, mask: 16789454) 0.049 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /home/acme/.local/share/app-info/yaml, mask: 16789454) 0.056 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: , mask: 16789454) 0.010 gmain/2245 inotify_add_watch(fd: 7<anon_inode:inotify>, pathname: /home/acme/~, mask: 16789454) 0.087 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_inotify_add) 0.436 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 56.042 gmain/2791 inotify_add_watch(fd: 4<anon_inode:inotify>, pathname: /var/lib/fwupd/remotes.d/lvfs-testing, mask: 16789454) 113.986 gmain/1721 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/gdm/~, mask: 16789454) 3777.265 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 3777.550 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) ^C[root@jouet perf]# Still not combining raw_syscalls:sys_enter + raw_syscalls:sys_exit, to get it strace-like, but that probably will come very naturally with some more wiring up... Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-ol83juin2cht9vzquynec5hz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-22 00:21:36 +08:00
long mask;
};
augmented_filename_syscall(inotify_add_watch);
perf trace: Augment inotify_add_watch pathname syscall arg Again, just changing tools/perf/examples/bpf/augmented_syscalls.c, that is starting to have too much boilerplate, some macro will come to the rescue. # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c 0.000 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/cache/app-info/yaml, mask: 16789454) 0.023 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/app-info/xmls, mask: 16789454) 0.028 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/app-info/yaml, mask: 16789454) 0.032 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/share/app-info/yaml, mask: 16789454) 0.039 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/local/share/app-info/xmls, mask: 16789454) 0.045 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /usr/local/share/app-info/yaml, mask: 16789454) 0.049 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /home/acme/.local/share/app-info/yaml, mask: 16789454) 0.056 gmain/2590 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: , mask: 16789454) 0.010 gmain/2245 inotify_add_watch(fd: 7<anon_inode:inotify>, pathname: /home/acme/~, mask: 16789454) 0.087 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_inotify_add) 0.436 perf/20116 openat(dfd: CWD, filename: /sys/kernel/debug/tracing/events/syscalls/sys_enter_openat/form) 56.042 gmain/2791 inotify_add_watch(fd: 4<anon_inode:inotify>, pathname: /var/lib/fwupd/remotes.d/lvfs-testing, mask: 16789454) 113.986 gmain/1721 inotify_add_watch(fd: 3<anon_inode:inotify>, pathname: /var/lib/gdm/~, mask: 16789454) 3777.265 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) 3777.550 gsd-color/2408 openat(dfd: CWD, filename: /etc/localtime) ^C[root@jouet perf]# Still not combining raw_syscalls:sys_enter + raw_syscalls:sys_exit, to get it strace-like, but that probably will come very naturally with some more wiring up... Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-ol83juin2cht9vzquynec5hz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-22 00:21:36 +08:00
struct statbuf;
struct syscall_enter_newstat_args {
unsigned long long common_tp_fields;
long syscall_nr;
char *filename_ptr;
struct stat *statbuf;
};
augmented_filename_syscall(newstat);
#ifndef _K_SS_MAXSIZE
#define _K_SS_MAXSIZE 128
#endif
#define augmented_sockaddr_syscall(syscall) \
struct augmented_enter_##syscall##_args { \
struct syscall_enter_##syscall##_args args; \
struct sockaddr_storage addr; \
}; \
int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
{ \
struct augmented_enter_##syscall##_args augmented_args; \
unsigned long addrlen = sizeof(augmented_args.addr); \
probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \
/* if (addrlen > augmented_args.args.addrlen) */ \
/* addrlen = augmented_args.args.addrlen; */ \
/* */ \
probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \
/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
&augmented_args, \
sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\
} \
int syscall_exit(syscall)(struct syscall_exit_args *args) \
{ \
return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
}
perf trace augmented_syscalls: Augment connect's 'sockaddr' arg As the first example of augmenting something other than a 'filename', augment the 'struct sockaddr' argument for the 'connect' syscall: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c ssh -6 fedorapeople.org 0.000 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 0.042 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.329 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.362 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.458 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.478 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.683 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET, port: 53, addr: 192.168.43.1 }, addrlen: 16) 4.710 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET6, port: 22, addr: 2610:28:3090:3001:5054:ff:fea7:9474 }, addrlen: 28) root@fedorapeople.org: Permission denied (publickey). # This is still just augmenting the syscalls:sys_enter_connect part, later we'll wire this up to augment the enter+exit combo, like in the tradicional 'perf trace' and 'strace' outputs. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-s7l541cbiqb22ifio6z7dpf6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-29 03:24:44 +08:00
struct sockaddr;
struct syscall_enter_bind_args {
unsigned long long common_tp_fields;
long syscall_nr;
long fd;
struct sockaddr *addr_ptr;
unsigned long addrlen;
};
augmented_sockaddr_syscall(bind);
perf trace augmented_syscalls: Augment connect's 'sockaddr' arg As the first example of augmenting something other than a 'filename', augment the 'struct sockaddr' argument for the 'connect' syscall: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c ssh -6 fedorapeople.org 0.000 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 0.042 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.329 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.362 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.458 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.478 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.683 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET, port: 53, addr: 192.168.43.1 }, addrlen: 16) 4.710 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET6, port: 22, addr: 2610:28:3090:3001:5054:ff:fea7:9474 }, addrlen: 28) root@fedorapeople.org: Permission denied (publickey). # This is still just augmenting the syscalls:sys_enter_connect part, later we'll wire this up to augment the enter+exit combo, like in the tradicional 'perf trace' and 'strace' outputs. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-s7l541cbiqb22ifio6z7dpf6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-29 03:24:44 +08:00
struct syscall_enter_connect_args {
unsigned long long common_tp_fields;
long syscall_nr;
long fd;
struct sockaddr *addr_ptr;
unsigned long addrlen;
};
augmented_sockaddr_syscall(connect);
perf trace augmented_syscalls: Augment connect's 'sockaddr' arg As the first example of augmenting something other than a 'filename', augment the 'struct sockaddr' argument for the 'connect' syscall: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c ssh -6 fedorapeople.org 0.000 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 0.042 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.329 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.362 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.458 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.478 ssh/29669 connect(fd: 3, uservaddr: { .family: LOCAL, path: /var/run/nscd/socket }, addrlen: 110) 1.683 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET, port: 53, addr: 192.168.43.1 }, addrlen: 16) 4.710 ssh/29669 connect(fd: 3<socket:[125942]>, uservaddr: { .family: INET6, port: 22, addr: 2610:28:3090:3001:5054:ff:fea7:9474 }, addrlen: 28) root@fedorapeople.org: Permission denied (publickey). # This is still just augmenting the syscalls:sys_enter_connect part, later we'll wire this up to augment the enter+exit combo, like in the tradicional 'perf trace' and 'strace' outputs. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-s7l541cbiqb22ifio6z7dpf6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-29 03:24:44 +08:00
struct syscall_enter_sendto_args {
unsigned long long common_tp_fields;
long syscall_nr;
long fd;
void *buff;
long len;
unsigned long flags;
struct sockaddr *addr_ptr;
long addr_len;
};
augmented_sockaddr_syscall(sendto);
perf trace: Handle "bpf-output" events associated with "__augmented_syscalls__" BPF map Add an example BPF script that writes syscalls:sys_enter_openat raw tracepoint payloads augmented with the first 64 bytes of the "filename" syscall pointer arg. Then catch it and print it just like with things written to the "__bpf_stdout__" map associated with a PERF_COUNT_SW_BPF_OUTPUT software event, by just letting the default tracepoint handler in 'perf trace', trace__event_handler(), to use bpf_output__fprintf(trace, sample), just like it does with all other PERF_COUNT_SW_BPF_OUTPUT events, i.e. just do a dump on the payload, so that we can check if what is being printed has at least the first 64 bytes of the "filename" arg: The augmented_syscalls.c eBPF script: # cat tools/perf/examples/bpf/augmented_syscalls.c // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; long dfd; char *filename_ptr; long flags; long mode; }; struct augmented_enter_openat_args { struct syscall_enter_openat_args args; char filename[64]; }; int syscall_enter(openat)(struct syscall_enter_openat_args *args) { struct augmented_enter_openat_args augmented_args; probe_read(&augmented_args.args, sizeof(augmented_args.args), args); probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, sizeof(augmented_args)); return 1; } license(GPL); # So it will just prepare a raw_syscalls:sys_enter payload for the "openat" syscall. This will eventually be done for all syscalls with pointer args, globally or just when the user asks, using some spec, which args of which syscalls it wants "expanded" this way, we'll probably start with just all the syscalls that have char * pointers with familiar names, the ones we already handle with the probe:vfs_getname kprobe if it is in place hooking the kernel getname_flags() function used to copy from user the paths. Running it we get: # perf trace -e perf/tools/perf/examples/bpf/augmented_syscalls.c,openat cat /etc/passwd > /dev/null 0.000 ( ): __augmented_syscalls__:X?.C......................`\..................../etc/ld.so.cache..#......,....ao.k...............k......1."......... 0.006 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC 0.008 ( 0.005 ms): cat/31292 openat(dfd: CWD, filename: 0x5c600da8, flags: CLOEXEC ) = 3 0.036 ( ): __augmented_syscalls__:X?.C.......................\..................../lib64/libc.so.6......... .\....#........?.......=.C..../."......... 0.037 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC 0.039 ( 0.007 ms): cat/31292 openat(dfd: CWD, filename: 0x5c808ce0, flags: CLOEXEC ) = 3 0.323 ( ): __augmented_syscalls__:X?.C.....................P....................../etc/passwd......>.C....@................>.C.....,....ao.>.C........ 0.325 ( ): syscalls:sys_enter_openat:dfd: CWD, filename: 0xe8be50d6 0.327 ( 0.004 ms): cat/31292 openat(dfd: CWD, filename: 0xe8be50d6 ) = 3 # We need to go on optimizing this to avoid seding trash or zeroes in the pointer content payload, using the return from bpf_probe_read_str(), but to keep things simple at this stage and make incremental progress, lets leave it at that for now. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: https://lkml.kernel.org/n/tip-g360n1zbj6bkbk6q0qo11c28@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-08-08 02:40:13 +08:00
license(GPL);