Merge branch 'bpf-ancestor-cgroup-id'
Andrey Ignatov says: ==================== This patch set adds new BPF helper bpf_skb_ancestor_cgroup_id that returns id of cgroup v2 that is ancestor of cgroup associated with the skb at the ancestor_level. The helper is useful to implement policies in TC based on cgroups that are upper in hierarchy than immediate cgroup associated with skb. v1->v2: - more reliable check for testing IPv6 to become ready in selftest. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
commit
2ce3206b9e
@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
|
||||
return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_ancestor - find ancestor of cgroup
|
||||
* @cgrp: cgroup to find ancestor of
|
||||
* @ancestor_level: level of ancestor to find starting from root
|
||||
*
|
||||
* Find ancestor of cgroup at specified level starting from root if it exists
|
||||
* and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
|
||||
* @ancestor_level.
|
||||
*
|
||||
* This function is safe to call as long as @cgrp is accessible.
|
||||
*/
|
||||
static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
|
||||
int ancestor_level)
|
||||
{
|
||||
struct cgroup *ptr;
|
||||
|
||||
if (cgrp->level < ancestor_level)
|
||||
return NULL;
|
||||
|
||||
for (ptr = cgrp;
|
||||
ptr && ptr->level > ancestor_level;
|
||||
ptr = cgroup_parent(ptr))
|
||||
;
|
||||
|
||||
if (ptr && ptr->level == ancestor_level)
|
||||
return ptr;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
|
||||
* @task: the task to be tested
|
||||
|
@ -2093,6 +2093,24 @@ union bpf_attr {
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
|
||||
* Description
|
||||
* Return id of cgroup v2 that is ancestor of cgroup associated
|
||||
* with the *skb* at the *ancestor_level*. The root cgroup is at
|
||||
* *ancestor_level* zero and each step down the hierarchy
|
||||
* increments the level. If *ancestor_level* == level of cgroup
|
||||
* associated with *skb*, then return value will be same as that
|
||||
* of **bpf_skb_cgroup_id**\ ().
|
||||
*
|
||||
* The helper is useful to implement policies based on cgroups
|
||||
* that are upper in hierarchy than immediate cgroup associated
|
||||
* with *skb*.
|
||||
*
|
||||
* The format of returned id and helper limitations are same as in
|
||||
* **bpf_skb_cgroup_id**\ ().
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_get_current_cgroup_id(void)
|
||||
* Return
|
||||
* A 64-bit integer containing the current cgroup id based
|
||||
@ -2207,7 +2225,8 @@ union bpf_attr {
|
||||
FN(skb_cgroup_id), \
|
||||
FN(get_current_cgroup_id), \
|
||||
FN(get_local_storage), \
|
||||
FN(sk_select_reuseport),
|
||||
FN(sk_select_reuseport), \
|
||||
FN(skb_ancestor_cgroup_id),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -3778,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
|
||||
ancestor_level)
|
||||
{
|
||||
struct sock *sk = skb_to_full_sk(skb);
|
||||
struct cgroup *ancestor;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return 0;
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
ancestor = cgroup_ancestor(cgrp, ancestor_level);
|
||||
if (!ancestor)
|
||||
return 0;
|
||||
|
||||
return ancestor->kn->id.id;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
|
||||
.func = bpf_skb_ancestor_cgroup_id,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
#endif
|
||||
|
||||
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
|
||||
@ -4966,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
case BPF_FUNC_skb_cgroup_id:
|
||||
return &bpf_skb_cgroup_id_proto;
|
||||
case BPF_FUNC_skb_ancestor_cgroup_id:
|
||||
return &bpf_skb_ancestor_cgroup_id_proto;
|
||||
#endif
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
|
@ -2093,6 +2093,24 @@ union bpf_attr {
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
|
||||
* Description
|
||||
* Return id of cgroup v2 that is ancestor of cgroup associated
|
||||
* with the *skb* at the *ancestor_level*. The root cgroup is at
|
||||
* *ancestor_level* zero and each step down the hierarchy
|
||||
* increments the level. If *ancestor_level* == level of cgroup
|
||||
* associated with *skb*, then return value will be same as that
|
||||
* of **bpf_skb_cgroup_id**\ ().
|
||||
*
|
||||
* The helper is useful to implement policies based on cgroups
|
||||
* that are upper in hierarchy than immediate cgroup associated
|
||||
* with *skb*.
|
||||
*
|
||||
* The format of returned id and helper limitations are same as in
|
||||
* **bpf_skb_cgroup_id**\ ().
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_get_current_cgroup_id(void)
|
||||
* Return
|
||||
* A 64-bit integer containing the current cgroup id based
|
||||
@ -2207,7 +2225,8 @@ union bpf_attr {
|
||||
FN(skb_cgroup_id), \
|
||||
FN(get_current_cgroup_id), \
|
||||
FN(get_local_storage), \
|
||||
FN(sk_select_reuseport),
|
||||
FN(sk_select_reuseport), \
|
||||
FN(skb_ancestor_cgroup_id),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -34,7 +34,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
|
||||
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
|
||||
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
|
||||
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
|
||||
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o
|
||||
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
|
||||
test_skb_cgroup_id_kern.o
|
||||
|
||||
# Order correspond to 'make run_tests' order
|
||||
TEST_PROGS := test_kmod.sh \
|
||||
@ -45,10 +46,11 @@ TEST_PROGS := test_kmod.sh \
|
||||
test_sock_addr.sh \
|
||||
test_tunnel.sh \
|
||||
test_lwt_seg6local.sh \
|
||||
test_lirc_mode2.sh
|
||||
test_lirc_mode2.sh \
|
||||
test_skb_cgroup_id.sh
|
||||
|
||||
# Compile but not part of 'make run_tests'
|
||||
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
|
||||
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
@ -59,6 +61,7 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
|
||||
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
|
||||
|
||||
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
|
||||
$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sock: cgroup_helpers.c
|
||||
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
|
||||
$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
|
||||
|
@ -139,6 +139,10 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
|
||||
(void *) BPF_FUNC_get_current_cgroup_id;
|
||||
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
|
||||
(void *) BPF_FUNC_get_local_storage;
|
||||
static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
|
||||
(void *) BPF_FUNC_skb_cgroup_id;
|
||||
static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
|
||||
(void *) BPF_FUNC_skb_ancestor_cgroup_id;
|
||||
|
||||
/* llvm builtin functions that eBPF C program may use to
|
||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||
|
62
tools/testing/selftests/bpf/test_skb_cgroup_id.sh
Executable file
62
tools/testing/selftests/bpf/test_skb_cgroup_id.sh
Executable file
@ -0,0 +1,62 @@
|
||||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
# Copyright (c) 2018 Facebook
|
||||
|
||||
set -eu
|
||||
|
||||
wait_for_ip()
|
||||
{
|
||||
local _i
|
||||
echo -n "Wait for testing link-local IP to become available "
|
||||
for _i in $(seq ${MAX_PING_TRIES}); do
|
||||
echo -n "."
|
||||
if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
|
||||
echo " OK"
|
||||
return
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
|
||||
exit 1
|
||||
}
|
||||
|
||||
setup()
|
||||
{
|
||||
# Create testing interfaces not to interfere with current environment.
|
||||
ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
|
||||
ip link set ${TEST_IF} up
|
||||
ip link set ${TEST_IF_PEER} up
|
||||
|
||||
wait_for_ip
|
||||
|
||||
tc qdisc add dev ${TEST_IF} clsact
|
||||
tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
|
||||
sec ${BPF_PROG_SECTION} da
|
||||
|
||||
BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
|
||||
awk '/ id / {sub(/.* id /, "", $0); print($1)}')
|
||||
}
|
||||
|
||||
cleanup()
|
||||
{
|
||||
ip link del ${TEST_IF} 2>/dev/null || :
|
||||
ip link del ${TEST_IF_PEER} 2>/dev/null || :
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
trap cleanup EXIT 2 3 6 15
|
||||
setup
|
||||
${PROG} ${TEST_IF} ${BPF_PROG_ID}
|
||||
}
|
||||
|
||||
DIR=$(dirname $0)
|
||||
TEST_IF="test_cgid_1"
|
||||
TEST_IF_PEER="test_cgid_2"
|
||||
MAX_PING_TRIES=5
|
||||
BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
|
||||
BPF_PROG_SECTION="cgroup_id_logger"
|
||||
BPF_PROG_ID=0
|
||||
PROG="${DIR}/test_skb_cgroup_id_user"
|
||||
|
||||
main
|
47
tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
Normal file
47
tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
Normal file
@ -0,0 +1,47 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2018 Facebook
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
#define NUM_CGROUP_LEVELS 4
|
||||
|
||||
struct bpf_map_def SEC("maps") cgroup_ids = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(__u64),
|
||||
.max_entries = NUM_CGROUP_LEVELS,
|
||||
};
|
||||
|
||||
static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
|
||||
{
|
||||
__u64 id;
|
||||
|
||||
/* [1] &level passed to external function that may change it, it's
|
||||
* incompatible with loop unroll.
|
||||
*/
|
||||
id = bpf_skb_ancestor_cgroup_id(skb, level);
|
||||
bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
|
||||
}
|
||||
|
||||
SEC("cgroup_id_logger")
|
||||
int log_cgroup_id(struct __sk_buff *skb)
|
||||
{
|
||||
/* Loop unroll can't be used here due to [1]. Unrolling manually.
|
||||
* Number of calls should be in sync with NUM_CGROUP_LEVELS.
|
||||
*/
|
||||
log_nth_level(skb, 0);
|
||||
log_nth_level(skb, 1);
|
||||
log_nth_level(skb, 2);
|
||||
log_nth_level(skb, 3);
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
int _version SEC("version") = 1;
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
187
tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
Normal file
187
tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
Normal file
@ -0,0 +1,187 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2018 Facebook
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <net/if.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
#include "bpf_rlimit.h"
|
||||
#include "cgroup_helpers.h"
|
||||
|
||||
#define CGROUP_PATH "/skb_cgroup_test"
|
||||
#define NUM_CGROUP_LEVELS 4
|
||||
|
||||
/* RFC 4291, Section 2.7.1 */
|
||||
#define LINKLOCAL_MULTICAST "ff02::1"
|
||||
|
||||
static int mk_dst_addr(const char *ip, const char *iface,
|
||||
struct sockaddr_in6 *dst)
|
||||
{
|
||||
memset(dst, 0, sizeof(*dst));
|
||||
|
||||
dst->sin6_family = AF_INET6;
|
||||
dst->sin6_port = htons(1025);
|
||||
|
||||
if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
|
||||
log_err("Invalid IPv6: %s", ip);
|
||||
return -1;
|
||||
}
|
||||
|
||||
dst->sin6_scope_id = if_nametoindex(iface);
|
||||
if (!dst->sin6_scope_id) {
|
||||
log_err("Failed to get index of iface: %s", iface);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int send_packet(const char *iface)
|
||||
{
|
||||
struct sockaddr_in6 dst;
|
||||
char msg[] = "msg";
|
||||
int err = 0;
|
||||
int fd = -1;
|
||||
|
||||
if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
|
||||
goto err;
|
||||
|
||||
fd = socket(AF_INET6, SOCK_DGRAM, 0);
|
||||
if (fd == -1) {
|
||||
log_err("Failed to create UDP socket");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
|
||||
sizeof(dst)) == -1) {
|
||||
log_err("Failed to send datagram");
|
||||
goto err;
|
||||
}
|
||||
|
||||
goto out;
|
||||
err:
|
||||
err = -1;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
int get_map_fd_by_prog_id(int prog_id)
|
||||
{
|
||||
struct bpf_prog_info info = {};
|
||||
__u32 info_len = sizeof(info);
|
||||
__u32 map_ids[1];
|
||||
int prog_fd = -1;
|
||||
int map_fd = -1;
|
||||
|
||||
prog_fd = bpf_prog_get_fd_by_id(prog_id);
|
||||
if (prog_fd < 0) {
|
||||
log_err("Failed to get fd by prog id %d", prog_id);
|
||||
goto err;
|
||||
}
|
||||
|
||||
info.nr_map_ids = 1;
|
||||
info.map_ids = (__u64) (unsigned long) map_ids;
|
||||
|
||||
if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
|
||||
log_err("Failed to get info by prog fd %d", prog_fd);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!info.nr_map_ids) {
|
||||
log_err("No maps found for prog fd %d", prog_fd);
|
||||
goto err;
|
||||
}
|
||||
|
||||
map_fd = bpf_map_get_fd_by_id(map_ids[0]);
|
||||
if (map_fd < 0)
|
||||
log_err("Failed to get fd by map id %d", map_ids[0]);
|
||||
err:
|
||||
if (prog_fd >= 0)
|
||||
close(prog_fd);
|
||||
return map_fd;
|
||||
}
|
||||
|
||||
int check_ancestor_cgroup_ids(int prog_id)
|
||||
{
|
||||
__u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
|
||||
__u32 level;
|
||||
int err = 0;
|
||||
int map_fd;
|
||||
|
||||
expected_ids[0] = 0x100000001; /* root cgroup */
|
||||
expected_ids[1] = get_cgroup_id("");
|
||||
expected_ids[2] = get_cgroup_id(CGROUP_PATH);
|
||||
expected_ids[3] = 0; /* non-existent cgroup */
|
||||
|
||||
map_fd = get_map_fd_by_prog_id(prog_id);
|
||||
if (map_fd < 0)
|
||||
goto err;
|
||||
|
||||
for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
|
||||
if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
|
||||
log_err("Failed to lookup key %d", level);
|
||||
goto err;
|
||||
}
|
||||
if (actual_ids[level] != expected_ids[level]) {
|
||||
log_err("%llx (actual) != %llx (expected), level: %u\n",
|
||||
actual_ids[level], expected_ids[level], level);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
goto out;
|
||||
err:
|
||||
err = -1;
|
||||
out:
|
||||
if (map_fd >= 0)
|
||||
close(map_fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int cgfd = -1;
|
||||
int err = 0;
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (setup_cgroup_environment())
|
||||
goto err;
|
||||
|
||||
cgfd = create_and_get_cgroup(CGROUP_PATH);
|
||||
if (!cgfd)
|
||||
goto err;
|
||||
|
||||
if (join_cgroup(CGROUP_PATH))
|
||||
goto err;
|
||||
|
||||
if (send_packet(argv[1]))
|
||||
goto err;
|
||||
|
||||
if (check_ancestor_cgroup_ids(atoi(argv[2])))
|
||||
goto err;
|
||||
|
||||
goto out;
|
||||
err:
|
||||
err = -1;
|
||||
out:
|
||||
close(cgfd);
|
||||
cleanup_cgroup_environment();
|
||||
printf("[%s]\n", err ? "FAIL" : "PASS");
|
||||
return err;
|
||||
}
|
Loading…
Reference in New Issue
Block a user