diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index a977339fbe0a..671cccf0dcd2 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -44,11 +44,17 @@ timeval of SO_TIMESTAMP (ms). Supports multiple types of timestamp requests. As a result, this socket option takes a bitmap of flags, not a boolean. In - err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, &val); + err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, + sizeof(val)); val is an integer with any of the following bits set. Setting other bit returns EINVAL and does not change the current state. +The socket option configures timestamp generation for individual +sk_buffs (1.3.1), timestamp reporting to the socket's error +queue (1.3.2) and options (1.3.3). Timestamp generation can also +be enabled for individual sendmsg calls using cmsg (1.3.4). + 1.3.1 Timestamp Generation @@ -71,13 +77,16 @@ SOF_TIMESTAMPING_RX_SOFTWARE: kernel receive stack. SOF_TIMESTAMPING_TX_HARDWARE: - Request tx timestamps generated by the network adapter. + Request tx timestamps generated by the network adapter. This flag + can be enabled via both socket options and control messages. SOF_TIMESTAMPING_TX_SOFTWARE: Request tx timestamps when data leaves the kernel. These timestamps are generated in the device driver as close as possible, but always prior to, passing the packet to the network interface. Hence, they require driver support and may not be available for all devices. + This flag can be enabled via both socket options and control messages. + SOF_TIMESTAMPING_TX_SCHED: Request tx timestamps prior to entering the packet scheduler. Kernel @@ -90,7 +99,8 @@ SOF_TIMESTAMPING_TX_SCHED: machines with virtual devices where a transmitted packet travels through multiple devices and, hence, multiple packet schedulers, a timestamp is generated at each layer. This allows for fine - grained measurement of queuing delay. + grained measurement of queuing delay. This flag can be enabled + via both socket options and control messages. SOF_TIMESTAMPING_TX_ACK: Request tx timestamps when all data in the send buffer has been @@ -99,6 +109,7 @@ SOF_TIMESTAMPING_TX_ACK: over-report measurement, because the timestamp is generated when all data up to and including the buffer at send() was acknowledged: the cumulative acknowledgment. The mechanism ignores SACK and FACK. + This flag can be enabled via both socket options and control messages. 1.3.2 Timestamp Reporting @@ -183,6 +194,37 @@ having access to the contents of the original packet, so cannot be combined with SOF_TIMESTAMPING_OPT_TSONLY. +1.3.4. Enabling timestamps via control messages + +In addition to socket options, timestamp generation can be requested +per write via cmsg, only for SOF_TIMESTAMPING_TX_* (see Section 1.3.1). +Using this feature, applications can sample timestamps per sendmsg() +without paying the overhead of enabling and disabling timestamps via +setsockopt: + + struct msghdr *msg; + ... + cmsg = CMSG_FIRSTHDR(msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *((__u32 *) CMSG_DATA(cmsg)) = SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK; + err = sendmsg(fd, msg, 0); + +The SOF_TIMESTAMPING_TX_* flags set via cmsg will override +the SOF_TIMESTAMPING_TX_* flags set via setsockopt. + +Moreover, applications must still enable timestamp reporting via +setsockopt to receive timestamps: + + __u32 val = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID /* or any other flag */; + err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, + sizeof(val)); + + 1.4 Bytestream Timestamps The SO_TIMESTAMPING interface supports timestamping of bytes in a diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 510e90a6bb26..9abc36bf77ea 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -861,7 +861,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; if (skb->sk && sk_fullsock(skb->sk)) { - sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags, + &skb_shinfo(skb)->tx_flags); sw_tx_timestamp(skb); } diff --git a/include/net/ip.h b/include/net/ip.h index fad74d323bd6..93725e546758 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -56,6 +56,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb) } struct ipcm_cookie { + struct sockcm_cookie sockc; __be32 addr; int oif; struct ip_options_rcu *opt; @@ -550,7 +551,7 @@ int ip_options_rcv_srr(struct sk_buff *skb); void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); -int ip_cmsg_send(struct net *net, struct msghdr *msg, +int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0aeb97aec5d..55ee1eb7d026 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -867,7 +867,8 @@ int ip6_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag); + struct rt6_info *rt, unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc); int ip6_push_pending_frames(struct sock *sk); @@ -884,7 +885,8 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, - unsigned int flags, int dontfrag); + unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) { diff --git a/include/net/sock.h b/include/net/sock.h index 255d3e03727b..e91b87f54f99 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1418,8 +1418,11 @@ void sk_send_sigurg(struct sock *sk); struct sockcm_cookie { u32 mark; + u16 tsflags; }; +int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, + struct sockcm_cookie *sockc); int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc); @@ -2054,19 +2057,21 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, sk->sk_stamp = skb->tstamp; } -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags); +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags); /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet + * @tsflags: timestamping flags to use * @tx_flags: completed with instructions for time stamping * * Note : callers should take care of initial *tx_flags value (usually 0) */ -static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags, + __u8 *tx_flags) { - if (unlikely(sk->sk_tsflags)) - __sock_tx_timestamp(sk, tx_flags); + if (unlikely(tsflags)) + __sock_tx_timestamp(tsflags, tx_flags); if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) *tx_flags |= SKBTX_WIFI_STATUS; } diff --git a/include/net/tcp.h b/include/net/tcp.h index f8bb4a4ed3d1..a23282996ca9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -754,7 +754,8 @@ struct tcp_skb_cb { TCPCB_REPAIRED) __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ - /* 1 byte hole */ + __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ + unused:7; __u32 ack_seq; /* Sequence number ACK'd */ union { struct inet_skb_parm h4; diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index b927413dde86..2b1c3450ab20 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -42,7 +42,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag); + int *hlimit, int *tclass, int *dontfrag, + struct sockcm_cookie *sockc); void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, __u16 destp, int bucket); diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 6d1abea9746e..264e515de16f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -31,6 +31,16 @@ enum { SOF_TIMESTAMPING_LAST }; +/* + * SO_TIMESTAMPING flags are either for recording a packet timestamp or for + * reporting the timestamp to user space. + * Recording flags can be set both via socket options and control messages. + */ +#define SOF_TIMESTAMPING_TX_RECORD_MASK (SOF_TIMESTAMPING_TX_HARDWARE | \ + SOF_TIMESTAMPING_TX_SOFTWARE | \ + SOF_TIMESTAMPING_TX_SCHED | \ + SOF_TIMESTAMPING_TX_ACK) + /** * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * diff --git a/net/can/raw.c b/net/can/raw.c index 2e67b1423cd3..972c187d40ab 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -755,7 +755,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err < 0) goto free_skb; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags); skb->dev = dev; skb->sk = sk; diff --git a/net/core/sock.c b/net/core/sock.c index b67b9aedb230..315f5e57fffe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -832,7 +832,8 @@ set_rcvbuf: !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & + (TCPF_CLOSE | TCPF_LISTEN)) { ret = -EINVAL; break; } @@ -1866,27 +1867,51 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, + struct sockcm_cookie *sockc) +{ + u32 tsflags; + + switch (cmsg->cmsg_type) { + case SO_MARK: + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + sockc->mark = *(u32 *)CMSG_DATA(cmsg); + break; + case SO_TIMESTAMPING: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + + tsflags = *(u32 *)CMSG_DATA(cmsg); + if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) + return -EINVAL; + + sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; + sockc->tsflags |= tsflags; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(__sock_cmsg_send); + int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc) { struct cmsghdr *cmsg; + int ret; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_SOCKET) continue; - switch (cmsg->cmsg_type) { - case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) - return -EINVAL; - sockc->mark = *(u32 *)CMSG_DATA(cmsg); - break; - default: - return -EINVAL; - } + ret = __sock_cmsg_send(sk, msg, cmsg, sockc); + if (ret) + return ret; } return 0; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 035ad645a8d9..1b7c0776c805 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -219,11 +219,12 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } EXPORT_SYMBOL(ip_cmsg_recv_offset); -int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, +int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6) { int err, val; struct cmsghdr *cmsg; + struct net *net = sock_net(sk); for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) @@ -244,6 +245,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, continue; } #endif + if (cmsg->cmsg_level == SOL_SOCKET) { + if (__sock_cmsg_send(sk, msg, cmsg, &ipc->sockc)) + return -EINVAL; + continue; + } + if (cmsg->cmsg_level != SOL_IP) continue; switch (cmsg->cmsg_type) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index cf9700b1a106..66ddcb60519a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -737,6 +737,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* no remote port */ } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; @@ -744,10 +745,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.ttl = 0; ipc.tos = -1; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); + err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); return err; @@ -768,6 +767,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rcu_read_unlock(); } + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + saddr = ipc.addr; ipc.addr = faddr = daddr; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8d22de74080c..438f50c1a676 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -339,8 +339,8 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, struct msghdr *msg, size_t length, - struct rtable **rtp, - unsigned int flags) + struct rtable **rtp, unsigned int flags, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -379,7 +379,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb->transport_header = skb->network_header; err = -EFAULT; @@ -540,6 +540,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = inet->inet_daddr; } + ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -548,7 +549,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(net, msg, &ipc, false); + err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); goto out; @@ -638,10 +639,10 @@ back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, - &rt, msg->msg_flags); + &rt, msg->msg_flags, &ipc.sockc); else { - sock_tx_timestamp(sk, &ipc.tx_flags); + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); if (!ipc.addr) ipc.addr = fl4.daddr; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 08b8b960a8ed..4d73858991af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -428,14 +428,16 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (sk->sk_tsflags) { + if (sk->sk_tsflags || tsflags) { struct skb_shared_info *shinfo = skb_shinfo(skb); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); - sock_tx_timestamp(sk, &shinfo->tx_flags); + sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags); if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP); } } @@ -957,7 +959,7 @@ new_segment: offset += copy; size -= copy; if (!size) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sk->sk_tsflags, skb); goto out; } @@ -1077,6 +1079,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool sg; @@ -1119,6 +1122,15 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) /* 'common' sending to sendq */ } + sockc.tsflags = sk->sk_tsflags; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_err; + } + } + /* This should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -1237,7 +1249,7 @@ new_segment: copied += copy; if (!msg_data_left(msg)) { - tcp_tx_timestamp(sk, skb); + tcp_tx_timestamp(sk, sockc.tsflags, skb); goto out; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f87b84a75691..a26e2d262358 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3082,7 +3082,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, const struct skb_shared_info *shinfo; /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */ - if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))) + if (likely(!TCP_SKB_CB(skb)->txstamp_ack)) return; shinfo = skb_shinfo(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 08eed5e16df0..45ff590661f4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1027,15 +1027,13 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) */ connected = 1; } - ipc.addr = inet->inet_saddr; + ipc.sockc.tsflags = sk->sk_tsflags; + ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - sock_tx_timestamp(sk, &ipc.tx_flags); - if (msg->msg_controllen) { - err = ip_cmsg_send(sock_net(sk), msg, &ipc, - sk->sk_family == AF_INET6); + err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); if (unlikely(err)) { kfree(ipc.opt); return err; @@ -1060,6 +1058,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) saddr = ipc.addr; ipc.addr = faddr = daddr; + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) return -EINVAL; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 428162155280..a73d70119fcd 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -685,7 +685,8 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, - int *hlimit, int *tclass, int *dontfrag) + int *hlimit, int *tclass, int *dontfrag, + struct sockcm_cookie *sockc) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -702,6 +703,12 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + if (cmsg->cmsg_level == SOL_SOCKET) { + if (__sock_cmsg_send(sk, msg, cmsg, sockc)) + return -EINVAL; + continue; + } + if (cmsg->cmsg_level != SOL_IPV6) continue; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0a37ddc7af51..6b573ebe49de 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; + struct sockcm_cookie sockc_unused = {0}; int iif = 0; int addr_type = 0; int len; @@ -527,7 +528,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, (struct rt6_info *)dst, - MSG_DONTWAIT, np->dontfrag); + MSG_DONTWAIT, np->dontfrag, &sockc_unused); if (err) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); @@ -566,6 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) int hlimit; u8 tclass; u32 mark = IP6_REPLY_MARK(net, skb->mark); + struct sockcm_cookie sockc_unused = {0}; saddr = &ipv6_hdr(skb)->daddr; @@ -617,7 +619,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, - np->dontfrag); + np->dontfrag, &sockc_unused); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index dc2db4f7b182..35d3ddc328f8 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -372,6 +372,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, if (olen > 0) { struct msghdr msg; struct flowi6 flowi6; + struct sockcm_cookie sockc_junk; int junk; err = -ENOMEM; @@ -390,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, memset(&flowi6, 0, sizeof(flowi6)); err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, - &junk, &junk, &junk); + &junk, &junk, &junk, &sockc_junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9428345d3a07..612f3d138bf0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1258,7 +1258,8 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - unsigned int flags, int dontfrag) + unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc) { struct sk_buff *skb, *skb_prev = NULL; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; @@ -1329,7 +1330,7 @@ emsgsize: csummode = CHECKSUM_PARTIAL; if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) { - sock_tx_timestamp(sk, &tx_flags); + sock_tx_timestamp(sk, sockc->tsflags, &tx_flags); if (tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -1565,7 +1566,8 @@ int ip6_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, - struct rt6_info *rt, unsigned int flags, int dontfrag) + struct rt6_info *rt, unsigned int flags, int dontfrag, + const struct sockcm_cookie *sockc) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -1593,7 +1595,8 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, dontfrag); + from, length, transhdrlen, flags, dontfrag, + sockc); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -1752,7 +1755,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, - int dontfrag) + int dontfrag, const struct sockcm_cookie *sockc) { struct inet_cork_full cork; struct inet6_cork v6_cork; @@ -1779,7 +1782,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, dontfrag); + flags, dontfrag, sockc); if (err) { __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); return ERR_PTR(err); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4449ad1f8114..a5557d22f89e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -471,6 +471,7 @@ sticky_done: struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; + struct sockcm_cookie sockc_junk; int junk; memset(&fl6, 0, sizeof(fl6)); @@ -503,7 +504,7 @@ sticky_done: msg.msg_control = (void *)(opt+1); retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, - &junk, &junk); + &junk, &junk, &sockc_junk); if (retv) goto done; update: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c382db7a2e73..da1cff79e447 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -62,6 +62,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct dst_entry *dst; struct rt6_info *rt; struct pingfakehdr pfh; + struct sockcm_cookie junk = {0}; pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); @@ -144,7 +145,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) err = ip6_append_data(sk, ping_getfrag, &pfh, len, 0, hlimit, np->tclass, NULL, &fl6, rt, - MSG_DONTWAIT, np->dontfrag); + MSG_DONTWAIT, np->dontfrag, &junk); if (err) { ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fa59dd7a427e..b07ce21983aa 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -745,6 +745,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct dst_entry *dst = NULL; struct raw6_frag_vec rfv; struct flowi6 fl6; + struct sockcm_cookie sockc; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -821,13 +822,15 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (fl6.flowi6_oif == 0) fl6.flowi6_oif = sk->sk_bound_dev_if; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + &hlimit, &tclass, &dontfrag, + &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -897,7 +900,7 @@ back_from_confirm: lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8125931106be..b772a7641fbd 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1128,6 +1128,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int connected = 0; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct sockcm_cookie sockc; /* destination address check */ if (sin6) { @@ -1247,6 +1248,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; @@ -1254,7 +1256,8 @@ do_udp_sendmsg: opt->tot_len = sizeof(*opt); err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); + &hlimit, &tclass, &dontfrag, + &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1321,7 +1324,7 @@ back_from_confirm: skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) err = udp_v6_send_skb(skb, &fl6); @@ -1348,7 +1351,8 @@ do_append_data: err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag, + &sockc); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 6b54ff3ff4cb..1a38f20b1ca6 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -492,6 +492,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; struct flowi6 fl6; + struct sockcm_cookie sockc_unused = {0}; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -562,9 +563,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, - &hlimit, &tclass, &dontfrag); - if (err < 0) { + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag, + &sockc_unused); + if (err < 0) { fl6_sock_release(flowlabel); return err; } @@ -625,7 +627,7 @@ back_from_confirm: err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, - msg->msg_flags, dontfrag); + msg->msg_flags, dontfrag, &sockc_unused); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1ecfa710ca98..0007e23202e4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1837,6 +1837,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); struct sk_buff *skb = NULL; struct net_device *dev; + struct sockcm_cookie sockc; __be16 proto = 0; int err; int extra_len = 0; @@ -1925,12 +1926,21 @@ retry: goto out_unlock; } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(sk, msg, &sockc); + if (unlikely(err)) { + err = -EINVAL; + goto out_unlock; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -2486,7 +2496,8 @@ static int packet_snd_vnet_gso(struct sk_buff *skb, static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, void *data, int tp_len, - __be16 proto, unsigned char *addr, int hlen, int copylen) + __be16 proto, unsigned char *addr, int hlen, int copylen, + const struct sockcm_cookie *sockc) { union tpacket_uhdr ph; int to_write, offset, len, nr_frags, len_max; @@ -2500,7 +2511,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = po->sk.sk_priority; skb->mark = po->sk.sk_mark; - sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); skb_shinfo(skb)->destructor_arg = ph.raw; skb_reserve(skb, hlen); @@ -2624,6 +2635,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; + struct sockcm_cookie sockc; __be16 proto; int err, reserve = 0; void *ph; @@ -2655,6 +2667,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); } + sockc.tsflags = 0; + if (msg->msg_controllen) { + err = sock_cmsg_send(&po->sk, msg, &sockc); + if (unlikely(err)) + goto out; + } + err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -2712,7 +2731,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_status; } tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, - addr, hlen, copylen); + addr, hlen, copylen, &sockc); if (likely(tp_len >= 0) && tp_len > dev->mtu + reserve && !po->has_vnet_hdr && @@ -2851,6 +2870,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + sockc.tsflags = 0; sockc.mark = sk->sk_mark; if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); @@ -2908,7 +2928,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); + sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags); if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && !packet_extra_vlan_len_allowed(dev, skb)) { diff --git a/net/socket.c b/net/socket.c index 5f77a8e93830..979d3146b081 100644 --- a/net/socket.c +++ b/net/socket.c @@ -587,20 +587,20 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) +void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) { u8 flags = *tx_flags; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) flags |= SKBTX_HW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) + if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) flags |= SKBTX_SW_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + if (tsflags & SOF_TIMESTAMPING_TX_SCHED) flags |= SKBTX_SCHED_TSTAMP; - if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) + if (tsflags & SOF_TIMESTAMPING_TX_ACK) flags |= SKBTX_ACK_TSTAMP; *tx_flags = flags;