From b32cc5b9a346319c171e3ad905e0cddda032b5eb Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:13 -0700 Subject: [PATCH 01/11] bpf: adding bpf_xdp_adjust_tail helper Adding new bpf helper which would allow us to manipulate xdp's data_end pointer, and allow us to reduce packet's size indended use case: to generate ICMP messages from XDP context, where such message would contain truncated original packet. Signed-off-by: Nikita V. Shirokov Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 10 +++++++++- net/core/filter.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c5ec89732a8d..9a2d1a04eb24 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -755,6 +755,13 @@ union bpf_attr { * @addr: pointer to struct sockaddr to bind socket to * @addr_len: length of sockaddr structure * Return: 0 on success or negative error code + * + * int bpf_xdp_adjust_tail(xdp_md, delta) + * Adjust the xdp_md.data_end by delta. Only shrinking of packet's + * size is supported. + * @xdp_md: pointer to xdp_md + * @delta: A negative integer to be added to xdp_md.data_end + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -821,7 +828,8 @@ union bpf_attr { FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ FN(msg_pull_data), \ - FN(bind), + FN(bind), \ + FN(xdp_adjust_tail), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index a374b8560bc4..29318598fd60 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2725,6 +2725,30 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) +{ + void *data_end = xdp->data_end + offset; + + /* only shrinking is allowed for now. */ + if (unlikely(offset >= 0)) + return -EINVAL; + + if (unlikely(data_end < xdp->data + ETH_HLEN)) + return -EINVAL; + + xdp->data_end = data_end; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = { + .func = bpf_xdp_adjust_tail, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) { void *meta = xdp->data_meta + offset; @@ -3074,7 +3098,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_l4_csum_replace || func == bpf_xdp_adjust_head || func == bpf_xdp_adjust_meta || - func == bpf_msg_pull_data) + func == bpf_msg_pull_data || + func == bpf_xdp_adjust_tail) return true; return false; @@ -3888,6 +3913,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_redirect_proto; case BPF_FUNC_redirect_map: return &bpf_xdp_redirect_map_proto; + case BPF_FUNC_xdp_adjust_tail: + return &bpf_xdp_adjust_tail_proto; default: return bpf_base_func_proto(func_id); } From 198d83bb3becf2e9c6c4fa744f35296c20da795a Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:14 -0700 Subject: [PATCH 02/11] bpf: make generic xdp compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for generic XDP we need to reflect this packet's length change by adjusting skb's tail pointer Acked-by: Alexei Starovoitov Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- net/core/dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 969462ebb296..11c789231a03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3996,9 +3996,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { struct netdev_rx_queue *rxqueue; + void *orig_data, *orig_data_end; u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - void *orig_data; int hlen, off; u32 mac_len; @@ -4037,6 +4037,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data_end = xdp.data_end; orig_data = xdp.data; rxqueue = netif_get_rxqueue(skb); @@ -4051,6 +4052,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, __skb_push(skb, -off); skb->mac_header += off; + /* check if bpf_xdp_adjust_tail was used. it can only "shrink" + * pckt. + */ + off = orig_data_end - xdp.data_end; + if (off != 0) + skb_set_tail_pointer(skb, xdp.data_end - xdp.data); + switch (act) { case XDP_REDIRECT: case XDP_TX: From e5e0a59b7cd17244cb0c1d87112b0c9e5e2bfb39 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:15 -0700 Subject: [PATCH 03/11] bpf: make mlx4 compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for mlx4 driver we will just calculate packet's length unconditionally (the same way as it's already being done in mlx5) Acked-by: Alexei Starovoitov Acked-by: Tariq Toukan Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 5c613c6663da..efc55feddc5c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -775,8 +775,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud act = bpf_prog_run_xdp(xdp_prog, &xdp); + length = xdp.data_end - xdp.data; if (xdp.data != orig_data) { - length = xdp.data_end - xdp.data; frags[0].page_offset = xdp.data - xdp.data_hard_start; va = xdp.data; From b968e735c79767a3c91217fbae691581aa557d8d Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:16 -0700 Subject: [PATCH 04/11] bpf: make bnxt compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for bnxt driver we will just calculate packet's length unconditionally Acked-by: Alexei Starovoitov Signed-off-by: Nikita V. Shirokov Acked-by: Michael Chan Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 1389ab5e05df..1f0e872d0667 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -113,10 +113,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, if (tx_avail != bp->tx_ring_size) *event &= ~BNXT_RX_EVENT; + *len = xdp.data_end - xdp.data; if (orig_data != xdp.data) { offset = xdp.data - xdp.data_hard_start; *data_ptr = xdp.data_hard_start + offset; - *len = xdp.data_end - xdp.data; } switch (act) { case XDP_PASS: From a48ce00f9aeb6ce1227e291c36a0fa4995273144 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:17 -0700 Subject: [PATCH 05/11] bpf: make cavium thunder compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for cavium's thunder driver we will just calculate packet's length unconditionally Acked-by: Alexei Starovoitov Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 707db3304396..7135db45927e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -538,9 +538,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, action = bpf_prog_run_xdp(prog, &xdp); rcu_read_unlock(); + len = xdp.data_end - xdp.data; /* Check if XDP program has changed headers */ if (orig_data != xdp.data) { - len = xdp.data_end - xdp.data; offset = orig_data - xdp.data; dma_addr -= offset; } From 5a6a22e378244cb3aa0473abc31a5d1c2cb2327b Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:18 -0700 Subject: [PATCH 06/11] bpf: make netronome nfp compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for nfp driver we will just calculate packet's length unconditionally Acked-by: Alexei Starovoitov Acked-by: Jakub Kicinski Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1eb6549f2a54..d9111c077699 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1722,7 +1722,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) act = bpf_prog_run_xdp(xdp_prog, &xdp); - pkt_len -= xdp.data - orig_data; + pkt_len = xdp.data_end - xdp.data; pkt_off += xdp.data - orig_data; switch (act) { From 8fb58f1ecf5cbcc7805dd339243b7d9837cbabbe Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:19 -0700 Subject: [PATCH 07/11] bpf: make tun compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for tun driver we need to adjust XDP_PASS handling by recalculating length of the packet if it was passed to the TCP/IP stack (in case if after xdp's prog run data_end pointer was adjusted) Reviewed-by: Jason Wang Signed-off-by: Nikita V. Shirokov Acked-by: Michael S. Tsirkin Signed-off-by: Daniel Borkmann --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1e58be152d5c..901351a6ed21 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1696,6 +1696,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, return NULL; case XDP_PASS: delta = orig_data - xdp.data; + len = xdp.data_end - xdp.data; break; default: bpf_warn_invalid_xdp_action(act); @@ -1716,7 +1717,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, } skb_reserve(skb, pad - delta); - skb_put(skb, len + delta); + skb_put(skb, len); get_page(alloc_frag->page); alloc_frag->offset += buflen; From 6870de435b90c083ae0f3f7f341287976ef56f03 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:20 -0700 Subject: [PATCH 08/11] bpf: make virtio compatible w/ bpf_xdp_adjust_tail w/ bpf_xdp_adjust_tail helper xdp's data_end pointer could be changed as well (only "decrease" of pointer's location is going to be supported). changing of this pointer will change packet's size. for virtio driver we need to adjust XDP_PASS handling by recalculating length of the packet if it was passed to the TCP/IP stack Reviewed-by: Jason Wang Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- drivers/net/virtio_net.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 01694e26f03e..779a4f798522 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -606,6 +606,7 @@ static struct sk_buff *receive_small(struct net_device *dev, case XDP_PASS: /* Recalculate length in case bpf program changed it */ delta = orig_data - xdp.data; + len = xdp.data_end - xdp.data; break; case XDP_TX: xdpf = convert_to_xdp_frame(&xdp); @@ -642,7 +643,7 @@ static struct sk_buff *receive_small(struct net_device *dev, goto err; } skb_reserve(skb, headroom - delta); - skb_put(skb, len + delta); + skb_put(skb, len); if (!delta) { buf += header_offset; memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); @@ -757,6 +758,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, offset = xdp.data - page_address(xdp_page) - vi->hdr_len; + /* recalculate len if xdp.data or xdp.data_end were + * adjusted + */ + len = xdp.data_end - xdp.data; /* We can only create skb based on xdp_page. */ if (unlikely(xdp_page != page)) { rcu_read_unlock(); From 587b80cce95b8aab89e5e35033953c005dc47f01 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:21 -0700 Subject: [PATCH 09/11] bpf: making bpf_prog_test run aware of possible data_end ptr change after introduction of bpf_xdp_adjust_tail helper packet length could be changed not only if xdp->data pointer has been changed but xdp->data_end as well. making bpf_prog_test_run aware of this possibility Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- net/bpf/test_run.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2ced48662c1f..68c3578343b4 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -170,7 +170,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.rxq = &rxqueue->xdp_rxq; retval = bpf_test_run(prog, &xdp, repeat, &duration); - if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN) + if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN || + xdp.data_end != xdp.data + size) size = xdp.data_end - xdp.data; ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); kfree(data); From 0367d0a29427d5916b98cf31dfc85a8293540614 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:22 -0700 Subject: [PATCH 10/11] bpf: adding tests for bpf_xdp_adjust_tail adding selftests for bpf_xdp_adjust_tail helper. in this synthetic test we are testing that 1) if data_end < data helper will return EINVAL 2) for normal use case packet's length would be reduced. Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 10 +++++- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bpf_helpers.h | 3 ++ .../testing/selftests/bpf/test_adjust_tail.c | 30 +++++++++++++++++ tools/testing/selftests/bpf/test_progs.c | 32 +++++++++++++++++++ 5 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_adjust_tail.c diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9d07465023a2..56bf493ba7ed 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -755,6 +755,13 @@ union bpf_attr { * @addr: pointer to struct sockaddr to bind socket to * @addr_len: length of sockaddr structure * Return: 0 on success or negative error code + * + * int bpf_xdp_adjust_tail(xdp_md, delta) + * Adjust the xdp_md.data_end by delta. Only shrinking of packet's + * size is supported. + * @xdp_md: pointer to xdp_md + * @delta: A negative integer to be added to xdp_md.data_end + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -821,7 +828,8 @@ union bpf_attr { FN(msg_apply_bytes), \ FN(msg_cork_bytes), \ FN(msg_pull_data), \ - FN(bind), + FN(bind), \ + FN(xdp_adjust_tail), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0a315ddabbf4..3e819dc70bee 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -31,7 +31,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ - sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o + sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d8223d99f96d..50c607014b22 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -96,6 +96,9 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; +static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_tail; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c new file mode 100644 index 000000000000..4cd5e860c903 --- /dev/null +++ b/tools/testing/selftests/bpf/test_adjust_tail.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("xdp_adjust_tail") +int _xdp_adjust_tail(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int offset = 0; + + if (data_end - data == 54) + offset = 256; + else + offset = 20; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_DROP; + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index faadbe233966..eedda98d7bb1 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -166,6 +166,37 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_tail(void) +{ + const char *file = "./test_adjust_tail.o"; + struct bpf_object *obj; + char buf[128]; + __u32 duration, retval, size; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + CHECK(err || errno || retval != XDP_DROP, + "ipv4", "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != XDP_TX || size != 54, + "ipv6", "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + bpf_object__close(obj); +} + + + #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 #define VIP_NUM 5 @@ -1177,6 +1208,7 @@ int main(void) { test_pkt_access(); test_xdp(); + test_xdp_adjust_tail(); test_l4lb_all(); test_xdp_noinline(); test_tcp_estats(); From c6ffd1ff785675c4a572c79f0e55ba5735edbaa0 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:23 -0700 Subject: [PATCH 11/11] bpf: add bpf_xdp_adjust_tail sample prog adding bpf's sample program which is using bpf_xdp_adjust_tail helper by generating ICMPv4 "packet to big" message if ingress packet's size is bigger then 600 bytes Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 4 + samples/bpf/xdp_adjust_tail_kern.c | 152 ++++++++++++++++++++++ samples/bpf/xdp_adjust_tail_user.c | 142 ++++++++++++++++++++ tools/testing/selftests/bpf/bpf_helpers.h | 2 + 4 files changed, 300 insertions(+) create mode 100644 samples/bpf/xdp_adjust_tail_kern.c create mode 100644 samples/bpf/xdp_adjust_tail_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4d6a6edd4bf6..aa8c392e2e52 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor hostprogs-y += xdp_rxq_info hostprogs-y += syscall_tp hostprogs-y += cpustat +hostprogs-y += xdp_adjust_tail # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o @@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o +xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o always += xdp2skb_meta_kern.o always += syscall_tp_kern.o always += cpustat_kern.o +always += xdp_adjust_tail_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_xdp_rxq_info += -lelf HOSTLOADLIBES_syscall_tp += -lelf HOSTLOADLIBES_cpustat += -lelf +HOSTLOADLIBES_xdp_adjust_tail += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c new file mode 100644 index 000000000000..411fdb21f8bc --- /dev/null +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program shows how to use bpf_xdp_adjust_tail() by + * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed + * to be more preice in case of v4)" where receiving packets bigger then + * 600 bytes. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define DEFAULT_TTL 64 +#define MAX_PCKT_SIZE 600 +#define ICMP_TOOBIG_SIZE 98 +#define ICMP_TOOBIG_PAYLOAD_SIZE 92 + +struct bpf_map_def SEC("maps") icmpcnt = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +static __always_inline void count_icmp(void) +{ + u64 key = 0; + u64 *icmp_count; + + icmp_count = bpf_map_lookup_elem(&icmpcnt, &key); + if (icmp_count) + *icmp_count += 1; +} + +static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth) +{ + struct ethhdr *eth; + + eth = data; + memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN); + memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN); + eth->h_proto = orig_eth->h_proto; +} + +static __always_inline __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline void ipv4_csum(void *data_start, int data_size, + __u32 *csum) +{ + *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum); + *csum = csum_fold_helper(*csum); +} + +static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) +{ + int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr); + + if (bpf_xdp_adjust_head(xdp, 0 - headroom)) + return XDP_DROP; + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + + if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end) + return XDP_DROP; + + struct iphdr *iph, *orig_iph; + struct icmphdr *icmp_hdr; + struct ethhdr *orig_eth; + __u32 csum = 0; + __u64 off = 0; + + orig_eth = data + headroom; + swap_mac(data, orig_eth); + off += sizeof(struct ethhdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + off += sizeof(struct icmphdr); + orig_iph = data + off; + icmp_hdr->type = ICMP_DEST_UNREACH; + icmp_hdr->code = ICMP_FRAG_NEEDED; + icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); + icmp_hdr->checksum = 0; + ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); + icmp_hdr->checksum = csum; + iph->ttl = DEFAULT_TTL; + iph->daddr = orig_iph->saddr; + iph->saddr = orig_iph->daddr; + iph->version = 4; + iph->ihl = 5; + iph->protocol = IPPROTO_ICMP; + iph->tos = 0; + iph->tot_len = htons( + ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr)); + iph->check = 0; + csum = 0; + ipv4_csum(iph, sizeof(struct iphdr), &csum); + iph->check = csum; + count_icmp(); + return XDP_TX; +} + + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int pckt_size = data_end - data; + int offset; + + if (pckt_size > MAX_PCKT_SIZE) { + offset = pckt_size - ICMP_TOOBIG_SIZE; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_PASS; + return send_icmp4_too_big(xdp); + } + return XDP_PASS; +} + +SEC("xdp_icmp") +int _xdp_icmp(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_IP)) + return handle_ipv4(xdp); + else + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c new file mode 100644 index 000000000000..f621a541b574 --- /dev/null +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" +#include "libbpf.h" +#include "bpf_util.h" + +#define STATS_INTERVAL_S 2U + +static int ifindex = -1; +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + if (ifindex > -1) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + exit(0); +} + +/* simple "icmp packet too big sent" counter + */ +static void poll_stats(unsigned int kill_after_s) +{ + time_t started_at = time(NULL); + __u64 value = 0; + int key = 0; + + + while (!kill_after_s || time(NULL) - started_at <= kill_after_s) { + sleep(STATS_INTERVAL_S); + + assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0); + + printf("icmp \"packet too big\" sent: %10llu pkts\n", value); + } +} + +static void usage(const char *cmd) +{ + printf("Start a XDP prog which send ICMP \"packet too big\" \n" + "messages if ingress packet is bigger then MAX_SIZE bytes\n"); + printf("Usage: %s [...]\n", cmd); + printf(" -i Interface Index\n"); + printf(" -T Default: 0 (forever)\n"); + printf(" -S use skb-mode\n"); + printf(" -N enforce native mode\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + unsigned char opt_flags[256] = {}; + unsigned int kill_after_s = 0; + const char *optstr = "i:T:SNh"; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + int opt; + int i; + + + for (i = 0; i < strlen(optstr); i++) + if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') + opt_flags[(unsigned char)optstr[i]] = 1; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + + switch (opt) { + case 'i': + ifindex = atoi(optarg); + break; + case 'T': + kill_after_s = atoi(optarg); + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(argv[0]); + return 1; + } + opt_flags[opt] = 0; + } + + for (i = 0; i < strlen(optstr); i++) { + if (opt_flags[(unsigned int)optstr[i]]) { + fprintf(stderr, "Missing argument -%c\n", optstr[i]); + usage(argv[0]); + return 1; + } + } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(kill_after_s); + + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + + return 0; +} diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 50c607014b22..9271576bdc8f 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -132,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = + (void *) BPF_FUNC_csum_diff; static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = (void *) BPF_FUNC_skb_under_cgroup; static int (*bpf_skb_change_head)(void *, int len, int flags) =