See https://www.spinics.net/lists/netdev/msg480099.html for the whole discussio, but to make the augmented_syscalls.c BPF program to get built and loaded successfully in a greater range of kernels, add an extra check. Related patch: a60dd35d2e39 ("bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO") That is in the kernel since v4.15, I couldn't figure why this is hitting me with 4.17.17, but adding the workaround discussed there makes this work with this fedora kernel and with 4.18.recent. Before: # uname -a Linux seventh 4.17.17-100.fc27.x86_64 #1 SMP Mon Aug 20 15:53:11 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (bf) r6 = r1 1: (b7) r1 = 0 2: (7b) *(u64 *)(r10 -8) = r1 3: (7b) *(u64 *)(r10 -16) = r1 4: (7b) *(u64 *)(r10 -24) = r1 5: (7b) *(u64 *)(r10 -32) = r1 6: (7b) *(u64 *)(r10 -40) = r1 7: (7b) *(u64 *)(r10 -48) = r1 8: (7b) *(u64 *)(r10 -56) = r1 9: (7b) *(u64 *)(r10 -64) = r1 10: (7b) *(u64 *)(r10 -72) = r1 11: (7b) *(u64 *)(r10 -80) = r1 12: (7b) *(u64 *)(r10 -88) = r1 13: (7b) *(u64 *)(r10 -96) = r1 14: (7b) *(u64 *)(r10 -104) = r1 15: (7b) *(u64 *)(r10 -112) = r1 16: (7b) *(u64 *)(r10 -120) = r1 17: (7b) *(u64 *)(r10 -128) = r1 18: (7b) *(u64 *)(r10 -136) = r1 19: (7b) *(u64 *)(r10 -144) = r1 20: (7b) *(u64 *)(r10 -152) = r1 21: (7b) *(u64 *)(r10 -160) = r1 22: (7b) *(u64 *)(r10 -168) = r1 23: (7b) *(u64 *)(r10 -176) = r1 24: (7b) *(u64 *)(r10 -184) = r1 25: (7b) *(u64 *)(r10 -192) = r1 26: (7b) *(u64 *)(r10 -200) = r1 27: (7b) *(u64 *)(r10 -208) = r1 28: (7b) *(u64 *)(r10 -216) = r1 29: (7b) *(u64 *)(r10 -224) = r1 30: (7b) *(u64 *)(r10 -232) = r1 31: (7b) *(u64 *)(r10 -240) = r1 32: (7b) *(u64 *)(r10 -248) = r1 33: (7b) *(u64 *)(r10 -256) = r1 34: (7b) *(u64 *)(r10 -264) = r1 35: (7b) *(u64 *)(r10 -272) = r1 36: (7b) *(u64 *)(r10 -280) = r1 37: (7b) *(u64 *)(r10 -288) = r1 38: (7b) *(u64 *)(r10 -296) = r1 39: (7b) *(u64 *)(r10 -304) = r1 40: (7b) *(u64 *)(r10 -312) = r1 41: (bf) r7 = r10 42: (07) r7 += -312 43: (bf) r1 = r7 44: (b7) r2 = 48 45: (bf) r3 = r6 46: (85) call bpf_probe_read#4 47: (79) r3 = *(u64 *)(r6 +24) 48: (bf) r1 = r10 49: (07) r1 += -256 50: (b7) r8 = 256 51: (b7) r2 = 256 52: (85) call bpf_probe_read_str#45 53: (bf) r1 = r0 54: (67) r1 <<= 32 55: (77) r1 >>= 32 56: (bf) r5 = r0 57: (07) r5 += 56 58: (2d) if r8 > r1 goto pc+1 R0=inv(id=0) R1=inv(id=0,umin_value=256,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 59: (b7) r5 = 312 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 70: (b7) r0 = 0 71: (95) exit from 58 to 60: R0=inv(id=0) R1=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R5=inv(id=0) R6=ctx(id=0,off=0,imm=0) R7=fp-312,call_-1 R8=inv256 R10=fp0,call_-1 fp-264=0 60: (63) *(u32 *)(r10 -264) = r0 61: (67) r5 <<= 32 62: (77) r5 >>= 32 63: (bf) r1 = r6 64: (18) r2 = 0xffff8b9120cc8500 66: (18) r3 = 0xffffffff 68: (bf) r4 = r7 69: (85) call bpf_perf_event_output#25 R5 unbounded memory access, use 'var &= const' or 'if (var < const)' libbpf: -- END LOG -- libbpf: failed to load program 'syscalls:sys_enter_openat' libbpf: failed to load object 'tools/perf/examples/bpf/augmented_syscalls.c' bpf: load objects failed: err=-4007: (Kernel verifier blocks program loading) event syntax error: 'tools/perf/examples/bpf/augmented_syscalls.c' \___ Kernel verifier blocks program loading After: # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/29249 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.008 cat/29249 syscalls:sys_exit_openat:0x3 0.021 cat/29249 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.025 cat/29249 syscalls:sys_exit_openat:0x3 0.180 cat/29249 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.185 cat/29249 syscalls:sys_exit_open:0x3 0.242 cat/29249 openat(dfd: CWD, filename: /etc/passwd) 0.245 cat/29249 syscalls:sys_exit_openat:0x3 # It also works with a more recent kernel: # uname -a Linux jouet 4.18.0-00014-g4e67b2a5df5d #6 SMP Thu Aug 30 17:34:17 -03 2018 x86_64 x86_64 x86_64 GNU/Linux # perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null 0.000 cat/26451 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) 0.020 cat/26451 syscalls:sys_exit_openat:0x3 0.039 cat/26451 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: CLOEXEC) 0.044 cat/26451 syscalls:sys_exit_openat:0x3 0.231 cat/26451 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) 0.238 cat/26451 syscalls:sys_exit_open:0x3 0.278 cat/26451 openat(dfd: CWD, filename: /etc/passwd) 0.282 cat/26451 syscalls:sys_exit_openat:0x3 # Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: Gianluca Borello <g.borello@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Cc: Yonghong Song <yhs@fb.com> Link: https://lkml.kernel.org/n/tip-wkpsivs1a9afwldbul46btbv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
174 lines
5.0 KiB
C
174 lines
5.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Augment syscalls with the contents of the pointer arguments.
|
|
*
|
|
* Test it with:
|
|
*
|
|
* perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
|
|
*
|
|
* It'll catch some openat syscalls related to the dynamic linked and
|
|
* the last one should be the one for '/etc/passwd'.
|
|
*
|
|
* This matches what is marshalled into the raw_syscall:sys_enter payload
|
|
* expected by the 'perf trace' beautifiers, and can be used by them, that will
|
|
* check if perf_sample->raw_data is more than what is expected for each
|
|
* syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
|
|
* contents of pointer arguments.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <linux/socket.h>
|
|
|
|
struct bpf_map SEC("maps") __augmented_syscalls__ = {
|
|
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
|
|
.key_size = sizeof(int),
|
|
.value_size = sizeof(u32),
|
|
.max_entries = __NR_CPUS__,
|
|
};
|
|
|
|
struct syscall_exit_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
long ret;
|
|
};
|
|
|
|
struct augmented_filename {
|
|
unsigned int size;
|
|
int reserved;
|
|
char value[256];
|
|
};
|
|
|
|
#define augmented_filename_syscall(syscall) \
|
|
struct augmented_enter_##syscall##_args { \
|
|
struct syscall_enter_##syscall##_args args; \
|
|
struct augmented_filename filename; \
|
|
}; \
|
|
int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
|
|
{ \
|
|
struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \
|
|
unsigned int len = sizeof(augmented_args); \
|
|
probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
|
|
augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \
|
|
sizeof(augmented_args.filename.value), \
|
|
args->filename_ptr); \
|
|
if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \
|
|
len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \
|
|
len &= sizeof(augmented_args.filename.value) - 1; \
|
|
} \
|
|
perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
|
|
&augmented_args, len); \
|
|
return 0; \
|
|
} \
|
|
int syscall_exit(syscall)(struct syscall_exit_args *args) \
|
|
{ \
|
|
return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
|
|
}
|
|
|
|
struct syscall_enter_openat_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
long dfd;
|
|
char *filename_ptr;
|
|
long flags;
|
|
long mode;
|
|
};
|
|
|
|
augmented_filename_syscall(openat);
|
|
|
|
struct syscall_enter_open_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
char *filename_ptr;
|
|
long flags;
|
|
long mode;
|
|
};
|
|
|
|
augmented_filename_syscall(open);
|
|
|
|
struct syscall_enter_inotify_add_watch_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
long fd;
|
|
char *filename_ptr;
|
|
long mask;
|
|
};
|
|
|
|
augmented_filename_syscall(inotify_add_watch);
|
|
|
|
struct statbuf;
|
|
|
|
struct syscall_enter_newstat_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
char *filename_ptr;
|
|
struct stat *statbuf;
|
|
};
|
|
|
|
augmented_filename_syscall(newstat);
|
|
|
|
#ifndef _K_SS_MAXSIZE
|
|
#define _K_SS_MAXSIZE 128
|
|
#endif
|
|
|
|
#define augmented_sockaddr_syscall(syscall) \
|
|
struct augmented_enter_##syscall##_args { \
|
|
struct syscall_enter_##syscall##_args args; \
|
|
struct sockaddr_storage addr; \
|
|
}; \
|
|
int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
|
|
{ \
|
|
struct augmented_enter_##syscall##_args augmented_args; \
|
|
unsigned long addrlen = sizeof(augmented_args.addr); \
|
|
probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
|
|
/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \
|
|
/* if (addrlen > augmented_args.args.addrlen) */ \
|
|
/* addrlen = augmented_args.args.addrlen; */ \
|
|
/* */ \
|
|
probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \
|
|
perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
|
|
&augmented_args, \
|
|
sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \
|
|
return 0; \
|
|
} \
|
|
int syscall_exit(syscall)(struct syscall_exit_args *args) \
|
|
{ \
|
|
return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
|
|
}
|
|
|
|
struct sockaddr;
|
|
|
|
struct syscall_enter_bind_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
long fd;
|
|
struct sockaddr *addr_ptr;
|
|
unsigned long addrlen;
|
|
};
|
|
|
|
augmented_sockaddr_syscall(bind);
|
|
|
|
struct syscall_enter_connect_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
long fd;
|
|
struct sockaddr *addr_ptr;
|
|
unsigned long addrlen;
|
|
};
|
|
|
|
augmented_sockaddr_syscall(connect);
|
|
|
|
struct syscall_enter_sendto_args {
|
|
unsigned long long common_tp_fields;
|
|
long syscall_nr;
|
|
long fd;
|
|
void *buff;
|
|
long len;
|
|
unsigned long flags;
|
|
struct sockaddr *addr_ptr;
|
|
long addr_len;
|
|
};
|
|
|
|
augmented_sockaddr_syscall(sendto);
|
|
|
|
license(GPL);
|