diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 53702b83ce5f..91b966978541 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -158,10 +158,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int __user *optlen, int max_optlen, int retval); -int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, - int optname, void *optval, - int *optlen, int retval); - static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -408,23 +404,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, ({ \ int __ret = retval; \ if (cgroup_bpf_enabled) \ - if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ - !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ - tcp_bpf_bypass_getsockopt, \ - level, optname)) \ - __ret = __cgroup_bpf_run_filter_getsockopt( \ - sock, level, optname, optval, optlen, \ - max_optlen, retval); \ - __ret; \ -}) - -#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ - optlen, retval) \ -({ \ - int __ret = retval; \ - if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ - sock, level, optname, optval, optlen, retval); \ + __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \ + optname, optval, \ + optlen, max_optlen, \ + retval); \ __ret; \ }) @@ -510,8 +493,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) -#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ - optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index cfcfef37b2f1..54c02c84906a 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -60,10 +60,4 @@ #define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__) #endif -#if IS_ENABLED(CONFIG_INET) -#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) -#else -#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__) -#endif - #endif diff --git a/include/net/sock.h b/include/net/sock.h index cf281e09edb4..016b694fa46b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1224,8 +1224,6 @@ struct proto { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); - bool (*bpf_bypass_getsockopt)(int level, - int optname); void (*release_cb)(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index c63224a0a839..2f7e4441f471 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -400,7 +400,6 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -bool tcp_bpf_bypass_getsockopt(int level, int optname); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 85927c2aa343..d3593a520bb7 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1546,52 +1546,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, sockopt_free_buf(&ctx); return ret; } - -int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, - int optname, void *optval, - int *optlen, int retval) -{ - struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_sockopt_kern ctx = { - .sk = sk, - .level = level, - .optname = optname, - .retval = retval, - .optlen = *optlen, - .optval = optval, - .optval_end = optval + *optlen, - }; - int ret; - - /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy - * user data back into BPF buffer when reval != 0. This is - * done as an optimization to avoid extra copy, assuming - * kernel won't populate the data in case of an error. - * Here we always pass the data and memset() should - * be called if that data shouldn't be "exported". - */ - - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], - &ctx, BPF_PROG_RUN); - if (!ret) - return -EPERM; - - if (ctx.optlen > *optlen) - return -EFAULT; - - /* BPF programs only allowed to set retval to 0, not some - * arbitrary value. - */ - if (ctx.retval != 0 && ctx.retval != retval) - return -EFAULT; - - /* BPF programs can shrink the buffer, export the modifications. - */ - if (ctx.optlen != 0) - *optlen = ctx.optlen; - - return ctx.retval; -} #endif static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c430e2511513..9e3ed6d2c2ac 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3946,8 +3946,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); - err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, - &zc, &len, err); release_sock(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, err)) goto zerocopy_rcv_sk_err; @@ -3982,18 +3980,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; } -bool tcp_bpf_bypass_getsockopt(int level, int optname) -{ - /* TCP do_tcp_getsockopt has optimized getsockopt implementation - * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE. - */ - if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE) - return true; - - return false; -} -EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt); - int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bd41a25b9fc7..45939b6c99c2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2796,7 +2796,6 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, - .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a45a03b730af..a68f75d3664e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2132,7 +2132,6 @@ struct proto tcpv6_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, - .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, diff --git a/net/socket.c b/net/socket.c index d983fef5e0ed..8aefd9114ab2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2138,9 +2138,6 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return __sys_setsockopt(fd, level, optname, optval, optlen); } -INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, - int optname)); - /* * Get a socket option. Because we don't know the option lengths we have * to pass a user mode parameter for the protocols to sort out. diff --git a/tools/include/uapi/linux/tcp.h b/tools/include/uapi/linux/tcp.h deleted file mode 100644 index 13ceeb395eb8..000000000000 --- a/tools/include/uapi/linux/tcp.h +++ /dev/null @@ -1,357 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions for the TCP protocol. - * - * Version: @(#)tcp.h 1.0.2 04/28/93 - * - * Author: Fred N. van Kempen, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef _UAPI_LINUX_TCP_H -#define _UAPI_LINUX_TCP_H - -#include -#include -#include - -struct tcphdr { - __be16 source; - __be16 dest; - __be32 seq; - __be32 ack_seq; -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 doff:4, - res1:4, - cwr:1, - ece:1, - urg:1, - ack:1, - psh:1, - rst:1, - syn:1, - fin:1; -#else -#error "Adjust your defines" -#endif - __be16 window; - __sum16 check; - __be16 urg_ptr; -}; - -/* - * The union cast uses a gcc extension to avoid aliasing problems - * (union is compatible to any of its members) - * This means this part of the code is -fstrict-aliasing safe now. - */ -union tcp_word_hdr { - struct tcphdr hdr; - __be32 words[5]; -}; - -#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) - -enum { - TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), - TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), - TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), - TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), - TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), - TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), - TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), - TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), - TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) -}; - -/* - * TCP general constants - */ -#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ -#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ - -/* TCP socket options */ -#define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ -#define TCP_MAXSEG 2 /* Limit MSS */ -#define TCP_CORK 3 /* Never send partially complete segments */ -#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ -#define TCP_KEEPINTVL 5 /* Interval between keepalives */ -#define TCP_KEEPCNT 6 /* Number of keepalives before death */ -#define TCP_SYNCNT 7 /* Number of SYN retransmits */ -#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ -#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ -#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ -#define TCP_INFO 11 /* Information about this connection. */ -#define TCP_QUICKACK 12 /* Block/reenable quick acks */ -#define TCP_CONGESTION 13 /* Congestion control algorithm */ -#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ -#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ -#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ -#define TCP_REPAIR 19 /* TCP sock is under repair right now */ -#define TCP_REPAIR_QUEUE 20 -#define TCP_QUEUE_SEQ 21 -#define TCP_REPAIR_OPTIONS 22 -#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ -#define TCP_TIMESTAMP 24 -#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ -#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ -#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ -#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ -#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ -#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ -#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ -#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ -#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ -#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ -#define TCP_ZEROCOPY_RECEIVE 35 -#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ - -#define TCP_CM_INQ TCP_INQ - -#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ - - -#define TCP_REPAIR_ON 1 -#define TCP_REPAIR_OFF 0 -#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ - -struct tcp_repair_opt { - __u32 opt_code; - __u32 opt_val; -}; - -struct tcp_repair_window { - __u32 snd_wl1; - __u32 snd_wnd; - __u32 max_window; - - __u32 rcv_wnd; - __u32 rcv_wup; -}; - -enum { - TCP_NO_QUEUE, - TCP_RECV_QUEUE, - TCP_SEND_QUEUE, - TCP_QUEUES_NR, -}; - -/* why fastopen failed from client perspective */ -enum tcp_fastopen_client_fail { - TFO_STATUS_UNSPEC, /* catch-all */ - TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */ - TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */ - TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */ -}; - -/* for TCP_INFO socket option */ -#define TCPI_OPT_TIMESTAMPS 1 -#define TCPI_OPT_SACK 2 -#define TCPI_OPT_WSCALE 4 -#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ -#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ -#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ - -/* - * Sender's congestion state indicating normal or abnormal situations - * in the last round of packets sent. The state is driven by the ACK - * information and timer events. - */ -enum tcp_ca_state { - /* - * Nothing bad has been observed recently. - * No apparent reordering, packet loss, or ECN marks. - */ - TCP_CA_Open = 0, -#define TCPF_CA_Open (1< -#include #include #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index e075d03ab630..9781d85cb223 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -7,7 +7,6 @@ #include #include -#include #include diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index b8b48cac2ac3..85f73261fab0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Cloudflare #include -#include #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index d5b44b135c00..b25c9c45c148 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -2,12 +2,6 @@ #include #include "cgroup_helpers.h" -#include - -#ifndef SOL_TCP -#define SOL_TCP IPPROTO_TCP -#endif - #define SOL_CUSTOM 0xdeadbeef static int getsetsockopt(void) @@ -17,7 +11,6 @@ static int getsetsockopt(void) char u8[4]; __u32 u32; char cc[16]; /* TCP_CA_NAME_MAX */ - struct tcp_zerocopy_receive zc; } buf = {}; socklen_t optlen; char *big_buf = NULL; @@ -161,27 +154,6 @@ static int getsetsockopt(void) goto err; } - /* TCP_ZEROCOPY_RECEIVE triggers */ - memset(&buf, 0, sizeof(buf)); - optlen = sizeof(buf.zc); - err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); - if (err) { - log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", - err, errno); - goto err; - } - - memset(&buf, 0, sizeof(buf)); - buf.zc.address = 12345; /* rejected by BPF */ - optlen = sizeof(buf.zc); - errno = 0; - err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); - if (errno != EPERM) { - log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", - err, errno); - goto err; - } - free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index d3597f81e6e9..712df7b49cb1 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include -#include #include +#include +#include #include char _license[] SEC("license") = "GPL"; @@ -12,10 +12,6 @@ __u32 _version SEC("version") = 1; #define PAGE_SIZE 4096 #endif -#ifndef SOL_TCP -#define SOL_TCP IPPROTO_TCP -#endif - #define SOL_CUSTOM 0xdeadbeef struct sockopt_sk { @@ -61,21 +57,6 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } - if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { - /* Verify that TCP_ZEROCOPY_RECEIVE triggers. - * It has a custom implementation for performance - * reasons. - */ - - if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) - return 0; /* EPERM, bounds check */ - - if (((struct tcp_zerocopy_receive *)optval)->address != 0) - return 0; /* EPERM, unexpected data */ - - return 1; - } - if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 1d429d67f8dd..238f5f61189e 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -16,6 +16,7 @@ typedef __u16 __sum16; #include #include #include +#include #include #include #include