mptcp: Handle MP_CAPABLE options for outgoing connections
Add hooks to tcp_output.c to add MP_CAPABLE to an outgoing SYN request, to capture the MP_CAPABLE in the received SYN-ACK, to add MP_CAPABLE to the final ACK of the three-way handshake. Use the .sk_rx_dst_set() handler in the subflow proto to capture when the responding SYN-ACK is received and notify the MPTCP connection layer. Co-developed-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Co-developed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com> Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
2303f994b3
commit
cec37a6e41
@ -137,6 +137,9 @@ struct tcp_request_sock {
|
|||||||
const struct tcp_request_sock_ops *af_specific;
|
const struct tcp_request_sock_ops *af_specific;
|
||||||
u64 snt_synack; /* first SYNACK sent time */
|
u64 snt_synack; /* first SYNACK sent time */
|
||||||
bool tfo_listener;
|
bool tfo_listener;
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP)
|
||||||
|
bool is_mptcp;
|
||||||
|
#endif
|
||||||
u32 txhash;
|
u32 txhash;
|
||||||
u32 rcv_isn;
|
u32 rcv_isn;
|
||||||
u32 snt_isn;
|
u32 snt_isn;
|
||||||
|
@ -39,8 +39,27 @@ struct mptcp_out_options {
|
|||||||
|
|
||||||
void mptcp_init(void);
|
void mptcp_init(void);
|
||||||
|
|
||||||
|
static inline bool sk_is_mptcp(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return tcp_sk(sk)->is_mptcp;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool rsk_is_mptcp(const struct request_sock *req)
|
||||||
|
{
|
||||||
|
return tcp_rsk(req)->is_mptcp;
|
||||||
|
}
|
||||||
|
|
||||||
void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
||||||
struct tcp_options_received *opt_rx);
|
struct tcp_options_received *opt_rx);
|
||||||
|
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
|
||||||
|
struct mptcp_out_options *opts);
|
||||||
|
void mptcp_rcv_synsent(struct sock *sk);
|
||||||
|
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
|
||||||
|
struct mptcp_out_options *opts);
|
||||||
|
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
|
||||||
|
unsigned int *size, unsigned int remaining,
|
||||||
|
struct mptcp_out_options *opts);
|
||||||
|
|
||||||
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
|
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
|
||||||
|
|
||||||
/* move the skb extension owership, with the assumption that 'to' is
|
/* move the skb extension owership, with the assumption that 'to' is
|
||||||
@ -89,11 +108,47 @@ static inline void mptcp_init(void)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool sk_is_mptcp(const struct sock *sk)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool rsk_is_mptcp(const struct request_sock *req)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
||||||
struct tcp_options_received *opt_rx)
|
struct tcp_options_received *opt_rx)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool mptcp_syn_options(struct sock *sk, unsigned int *size,
|
||||||
|
struct mptcp_out_options *opts)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mptcp_rcv_synsent(struct sock *sk)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool mptcp_synack_options(const struct request_sock *req,
|
||||||
|
unsigned int *size,
|
||||||
|
struct mptcp_out_options *opts)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool mptcp_established_options(struct sock *sk,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
unsigned int *size,
|
||||||
|
unsigned int remaining,
|
||||||
|
struct mptcp_out_options *opts)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void mptcp_skb_ext_move(struct sk_buff *to,
|
static inline void mptcp_skb_ext_move(struct sk_buff *to,
|
||||||
const struct sk_buff *from)
|
const struct sk_buff *from)
|
||||||
{
|
{
|
||||||
@ -107,6 +162,8 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
|
|||||||
|
|
||||||
#endif /* CONFIG_MPTCP */
|
#endif /* CONFIG_MPTCP */
|
||||||
|
|
||||||
|
void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped);
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
int mptcpv6_init(void);
|
int mptcpv6_init(void);
|
||||||
#elif IS_ENABLED(CONFIG_IPV6)
|
#elif IS_ENABLED(CONFIG_IPV6)
|
||||||
|
@ -5978,6 +5978,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
|
|||||||
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
||||||
tcp_initialize_rcv_mss(sk);
|
tcp_initialize_rcv_mss(sk);
|
||||||
|
|
||||||
|
if (sk_is_mptcp(sk))
|
||||||
|
mptcp_rcv_synsent(sk);
|
||||||
|
|
||||||
/* Remember, tcp_poll() does not lock socket!
|
/* Remember, tcp_poll() does not lock socket!
|
||||||
* Change state from SYN-SENT only after copied_seq
|
* Change state from SYN-SENT only after copied_seq
|
||||||
* is initialized. */
|
* is initialized. */
|
||||||
@ -6600,6 +6603,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
|||||||
|
|
||||||
tcp_rsk(req)->af_specific = af_ops;
|
tcp_rsk(req)->af_specific = af_ops;
|
||||||
tcp_rsk(req)->ts_off = 0;
|
tcp_rsk(req)->ts_off = 0;
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP)
|
||||||
|
tcp_rsk(req)->is_mptcp = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
tcp_clear_options(&tmp_opt);
|
tcp_clear_options(&tmp_opt);
|
||||||
tmp_opt.mss_clamp = af_ops->mss_clamp;
|
tmp_opt.mss_clamp = af_ops->mss_clamp;
|
||||||
|
@ -597,6 +597,22 @@ static void smc_set_option_cond(const struct tcp_sock *tp,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mptcp_set_option_cond(const struct request_sock *req,
|
||||||
|
struct tcp_out_options *opts,
|
||||||
|
unsigned int *remaining)
|
||||||
|
{
|
||||||
|
if (rsk_is_mptcp(req)) {
|
||||||
|
unsigned int size;
|
||||||
|
|
||||||
|
if (mptcp_synack_options(req, &size, &opts->mptcp)) {
|
||||||
|
if (*remaining >= size) {
|
||||||
|
opts->options |= OPTION_MPTCP;
|
||||||
|
*remaining -= size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Compute TCP options for SYN packets. This is not the final
|
/* Compute TCP options for SYN packets. This is not the final
|
||||||
* network wire format yet.
|
* network wire format yet.
|
||||||
*/
|
*/
|
||||||
@ -666,6 +682,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
|
|||||||
|
|
||||||
smc_set_option(tp, opts, &remaining);
|
smc_set_option(tp, opts, &remaining);
|
||||||
|
|
||||||
|
if (sk_is_mptcp(sk)) {
|
||||||
|
unsigned int size;
|
||||||
|
|
||||||
|
if (mptcp_syn_options(sk, &size, &opts->mptcp)) {
|
||||||
|
opts->options |= OPTION_MPTCP;
|
||||||
|
remaining -= size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return MAX_TCP_OPTION_SPACE - remaining;
|
return MAX_TCP_OPTION_SPACE - remaining;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -727,6 +752,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mptcp_set_option_cond(req, opts, &remaining);
|
||||||
|
|
||||||
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
|
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
|
||||||
|
|
||||||
return MAX_TCP_OPTION_SPACE - remaining;
|
return MAX_TCP_OPTION_SPACE - remaining;
|
||||||
@ -764,6 +791,23 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
|
|||||||
size += TCPOLEN_TSTAMP_ALIGNED;
|
size += TCPOLEN_TSTAMP_ALIGNED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* MPTCP options have precedence over SACK for the limited TCP
|
||||||
|
* option space because a MPTCP connection would be forced to
|
||||||
|
* fall back to regular TCP if a required multipath option is
|
||||||
|
* missing. SACK still gets a chance to use whatever space is
|
||||||
|
* left.
|
||||||
|
*/
|
||||||
|
if (sk_is_mptcp(sk)) {
|
||||||
|
unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
||||||
|
unsigned int opt_size = 0;
|
||||||
|
|
||||||
|
if (mptcp_established_options(sk, skb, &opt_size, remaining,
|
||||||
|
&opts->mptcp)) {
|
||||||
|
opts->options |= OPTION_MPTCP;
|
||||||
|
size += opt_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
|
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
|
||||||
if (unlikely(eff_sacks)) {
|
if (unlikely(eff_sacks)) {
|
||||||
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
|
||||||
|
@ -238,6 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
|
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
|
||||||
|
|
||||||
icsk->icsk_af_ops = &ipv6_mapped;
|
icsk->icsk_af_ops = &ipv6_mapped;
|
||||||
|
if (sk_is_mptcp(sk))
|
||||||
|
mptcp_handle_ipv6_mapped(sk, true);
|
||||||
sk->sk_backlog_rcv = tcp_v4_do_rcv;
|
sk->sk_backlog_rcv = tcp_v4_do_rcv;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
||||||
@ -248,6 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
if (err) {
|
if (err) {
|
||||||
icsk->icsk_ext_hdr_len = exthdrlen;
|
icsk->icsk_ext_hdr_len = exthdrlen;
|
||||||
icsk->icsk_af_ops = &ipv6_specific;
|
icsk->icsk_af_ops = &ipv6_specific;
|
||||||
|
if (sk_is_mptcp(sk))
|
||||||
|
mptcp_handle_ipv6_mapped(sk, false);
|
||||||
sk->sk_backlog_rcv = tcp_v6_do_rcv;
|
sk->sk_backlog_rcv = tcp_v6_do_rcv;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
tp->af_specific = &tcp_sock_ipv6_specific;
|
tp->af_specific = &tcp_sock_ipv6_specific;
|
||||||
@ -1203,6 +1207,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
|
|||||||
newnp->saddr = newsk->sk_v6_rcv_saddr;
|
newnp->saddr = newsk->sk_v6_rcv_saddr;
|
||||||
|
|
||||||
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
|
inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
|
||||||
|
if (sk_is_mptcp(newsk))
|
||||||
|
mptcp_handle_ipv6_mapped(newsk, true);
|
||||||
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
|
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
|
||||||
#ifdef CONFIG_TCP_MD5SIG
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
|
||||||
|
@ -72,14 +72,114 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mptcp_get_options(const struct sk_buff *skb,
|
||||||
|
struct tcp_options_received *opt_rx)
|
||||||
|
{
|
||||||
|
const unsigned char *ptr;
|
||||||
|
const struct tcphdr *th = tcp_hdr(skb);
|
||||||
|
int length = (th->doff * 4) - sizeof(struct tcphdr);
|
||||||
|
|
||||||
|
ptr = (const unsigned char *)(th + 1);
|
||||||
|
|
||||||
|
while (length > 0) {
|
||||||
|
int opcode = *ptr++;
|
||||||
|
int opsize;
|
||||||
|
|
||||||
|
switch (opcode) {
|
||||||
|
case TCPOPT_EOL:
|
||||||
|
return;
|
||||||
|
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
|
||||||
|
length--;
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
opsize = *ptr++;
|
||||||
|
if (opsize < 2) /* "silly options" */
|
||||||
|
return;
|
||||||
|
if (opsize > length)
|
||||||
|
return; /* don't parse partial options */
|
||||||
|
if (opcode == TCPOPT_MPTCP)
|
||||||
|
mptcp_parse_option(ptr, opsize, opt_rx);
|
||||||
|
ptr += opsize - 2;
|
||||||
|
length -= opsize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
|
||||||
|
struct mptcp_out_options *opts)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
|
||||||
|
if (subflow->request_mptcp) {
|
||||||
|
pr_debug("local_key=%llu", subflow->local_key);
|
||||||
|
opts->suboptions = OPTION_MPTCP_MPC_SYN;
|
||||||
|
opts->sndr_key = subflow->local_key;
|
||||||
|
*size = TCPOLEN_MPTCP_MPC_SYN;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mptcp_rcv_synsent(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
|
|
||||||
|
pr_debug("subflow=%p", subflow);
|
||||||
|
if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
|
||||||
|
subflow->mp_capable = 1;
|
||||||
|
subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
|
||||||
|
} else {
|
||||||
|
tcp_sk(sk)->is_mptcp = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
|
||||||
|
unsigned int *size, unsigned int remaining,
|
||||||
|
struct mptcp_out_options *opts)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
|
||||||
|
if (subflow->mp_capable && !subflow->fourth_ack) {
|
||||||
|
opts->suboptions = OPTION_MPTCP_MPC_ACK;
|
||||||
|
opts->sndr_key = subflow->local_key;
|
||||||
|
opts->rcvr_key = subflow->remote_key;
|
||||||
|
*size = TCPOLEN_MPTCP_MPC_ACK;
|
||||||
|
subflow->fourth_ack = 1;
|
||||||
|
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
|
||||||
|
subflow, subflow->local_key, subflow->remote_key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
|
||||||
|
struct mptcp_out_options *opts)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||||
|
|
||||||
|
if (subflow_req->mp_capable) {
|
||||||
|
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
|
||||||
|
opts->sndr_key = subflow_req->local_key;
|
||||||
|
*size = TCPOLEN_MPTCP_MPC_SYNACK;
|
||||||
|
pr_debug("subflow_req=%p, local_key=%llu",
|
||||||
|
subflow_req, subflow_req->local_key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
|
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
|
||||||
{
|
{
|
||||||
if ((OPTION_MPTCP_MPC_SYN |
|
if ((OPTION_MPTCP_MPC_SYN |
|
||||||
|
OPTION_MPTCP_MPC_SYNACK |
|
||||||
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
|
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
|
||||||
u8 len;
|
u8 len;
|
||||||
|
|
||||||
if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
|
if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
|
||||||
len = TCPOLEN_MPTCP_MPC_SYN;
|
len = TCPOLEN_MPTCP_MPC_SYN;
|
||||||
|
else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
|
||||||
|
len = TCPOLEN_MPTCP_MPC_SYNACK;
|
||||||
else
|
else
|
||||||
len = TCPOLEN_MPTCP_MPC_ACK;
|
len = TCPOLEN_MPTCP_MPC_ACK;
|
||||||
|
|
||||||
|
@ -25,12 +25,28 @@
|
|||||||
*/
|
*/
|
||||||
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
|
static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
|
||||||
{
|
{
|
||||||
if (!msk->subflow)
|
if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return msk->subflow;
|
return msk->subflow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* if msk has a single subflow, and the mp_capable handshake is failed,
|
||||||
|
* return it.
|
||||||
|
* Otherwise returns NULL
|
||||||
|
*/
|
||||||
|
static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
|
||||||
|
{
|
||||||
|
struct socket *ssock = __mptcp_nmpc_socket(msk);
|
||||||
|
|
||||||
|
sock_owned_by_me((const struct sock *)msk);
|
||||||
|
|
||||||
|
if (!ssock || sk_is_mptcp(ssock->sk))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return ssock;
|
||||||
|
}
|
||||||
|
|
||||||
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
|
static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
|
||||||
{
|
{
|
||||||
return ((struct sock *)msk)->sk_state == TCP_CLOSE;
|
return ((struct sock *)msk)->sk_state == TCP_CLOSE;
|
||||||
@ -56,6 +72,7 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
|
|||||||
|
|
||||||
msk->subflow = ssock;
|
msk->subflow = ssock;
|
||||||
subflow = mptcp_subflow_ctx(ssock->sk);
|
subflow = mptcp_subflow_ctx(ssock->sk);
|
||||||
|
list_add(&subflow->node, &msk->conn_list);
|
||||||
subflow->request_mptcp = 1;
|
subflow->request_mptcp = 1;
|
||||||
|
|
||||||
set_state:
|
set_state:
|
||||||
@ -64,66 +81,169 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
|
|||||||
return ssock;
|
return ssock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow;
|
||||||
|
|
||||||
|
sock_owned_by_me((const struct sock *)msk);
|
||||||
|
|
||||||
|
mptcp_for_each_subflow(msk, subflow) {
|
||||||
|
return mptcp_subflow_tcp_sock(subflow);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||||
{
|
{
|
||||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
struct socket *subflow = msk->subflow;
|
struct socket *ssock;
|
||||||
|
struct sock *ssk;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
return sock_sendmsg(subflow, msg);
|
lock_sock(sk);
|
||||||
|
ssock = __mptcp_tcp_fallback(msk);
|
||||||
|
if (ssock) {
|
||||||
|
pr_debug("fallback passthrough");
|
||||||
|
ret = sock_sendmsg(ssock, msg);
|
||||||
|
release_sock(sk);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssk = mptcp_subflow_get(msk);
|
||||||
|
if (!ssk) {
|
||||||
|
release_sock(sk);
|
||||||
|
return -ENOTCONN;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = sock_sendmsg(ssk->sk_socket, msg);
|
||||||
|
|
||||||
|
release_sock(sk);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||||
int nonblock, int flags, int *addr_len)
|
int nonblock, int flags, int *addr_len)
|
||||||
{
|
{
|
||||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
struct socket *subflow = msk->subflow;
|
struct socket *ssock;
|
||||||
|
struct sock *ssk;
|
||||||
|
int copied = 0;
|
||||||
|
|
||||||
if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
|
if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
return sock_recvmsg(subflow, msg, flags);
|
lock_sock(sk);
|
||||||
|
ssock = __mptcp_tcp_fallback(msk);
|
||||||
|
if (ssock) {
|
||||||
|
pr_debug("fallback-read subflow=%p",
|
||||||
|
mptcp_subflow_ctx(ssock->sk));
|
||||||
|
copied = sock_recvmsg(ssock, msg, flags);
|
||||||
|
release_sock(sk);
|
||||||
|
return copied;
|
||||||
|
}
|
||||||
|
|
||||||
|
ssk = mptcp_subflow_get(msk);
|
||||||
|
if (!ssk) {
|
||||||
|
release_sock(sk);
|
||||||
|
return -ENOTCONN;
|
||||||
|
}
|
||||||
|
|
||||||
|
copied = sock_recvmsg(ssk->sk_socket, msg, flags);
|
||||||
|
|
||||||
|
release_sock(sk);
|
||||||
|
|
||||||
|
return copied;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* subflow sockets can be either outgoing (connect) or incoming
|
||||||
|
* (accept).
|
||||||
|
*
|
||||||
|
* Outgoing subflows use in-kernel sockets.
|
||||||
|
* Incoming subflows do not have their own 'struct socket' allocated,
|
||||||
|
* so we need to use tcp_close() after detaching them from the mptcp
|
||||||
|
* parent socket.
|
||||||
|
*/
|
||||||
|
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||||
|
struct mptcp_subflow_context *subflow,
|
||||||
|
long timeout)
|
||||||
|
{
|
||||||
|
struct socket *sock = READ_ONCE(ssk->sk_socket);
|
||||||
|
|
||||||
|
list_del(&subflow->node);
|
||||||
|
|
||||||
|
if (sock && sock != sk->sk_socket) {
|
||||||
|
/* outgoing subflow */
|
||||||
|
sock_release(sock);
|
||||||
|
} else {
|
||||||
|
/* incoming subflow */
|
||||||
|
tcp_close(ssk, timeout);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mptcp_init_sock(struct sock *sk)
|
static int mptcp_init_sock(struct sock *sk)
|
||||||
{
|
{
|
||||||
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&msk->conn_list);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mptcp_close(struct sock *sk, long timeout)
|
static void mptcp_close(struct sock *sk, long timeout)
|
||||||
{
|
{
|
||||||
|
struct mptcp_subflow_context *subflow, *tmp;
|
||||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
struct socket *ssock;
|
|
||||||
|
|
||||||
inet_sk_state_store(sk, TCP_CLOSE);
|
inet_sk_state_store(sk, TCP_CLOSE);
|
||||||
|
|
||||||
ssock = __mptcp_nmpc_socket(msk);
|
lock_sock(sk);
|
||||||
if (ssock) {
|
|
||||||
pr_debug("subflow=%p", mptcp_subflow_ctx(ssock->sk));
|
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
|
||||||
sock_release(ssock);
|
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||||
|
|
||||||
|
__mptcp_close_ssk(sk, ssk, subflow, timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
sock_orphan(sk);
|
release_sock(sk);
|
||||||
sock_put(sk);
|
sk_common_release(sk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len)
|
static int mptcp_get_port(struct sock *sk, unsigned short snum)
|
||||||
{
|
{
|
||||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
int err;
|
struct socket *ssock;
|
||||||
|
|
||||||
saddr->sa_family = AF_INET;
|
ssock = __mptcp_nmpc_socket(msk);
|
||||||
|
pr_debug("msk=%p, subflow=%p", msk, ssock);
|
||||||
|
if (WARN_ON_ONCE(!ssock))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
pr_debug("msk=%p, subflow=%p", msk,
|
return inet_csk_get_port(ssock->sk, snum);
|
||||||
mptcp_subflow_ctx(msk->subflow->sk));
|
}
|
||||||
|
|
||||||
err = kernel_connect(msk->subflow, saddr, len, 0);
|
void mptcp_finish_connect(struct sock *ssk)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow;
|
||||||
|
struct mptcp_sock *msk;
|
||||||
|
struct sock *sk;
|
||||||
|
|
||||||
sk->sk_state = TCP_ESTABLISHED;
|
subflow = mptcp_subflow_ctx(ssk);
|
||||||
|
|
||||||
return err;
|
if (!subflow->mp_capable)
|
||||||
|
return;
|
||||||
|
|
||||||
|
sk = subflow->conn;
|
||||||
|
msk = mptcp_sk(sk);
|
||||||
|
|
||||||
|
/* the socket is not connected yet, no msk/subflow ops can access/race
|
||||||
|
* accessing the field below
|
||||||
|
*/
|
||||||
|
WRITE_ONCE(msk->remote_key, subflow->remote_key);
|
||||||
|
WRITE_ONCE(msk->local_key, subflow->local_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct proto mptcp_prot = {
|
static struct proto mptcp_prot = {
|
||||||
@ -132,13 +252,12 @@ static struct proto mptcp_prot = {
|
|||||||
.init = mptcp_init_sock,
|
.init = mptcp_init_sock,
|
||||||
.close = mptcp_close,
|
.close = mptcp_close,
|
||||||
.accept = inet_csk_accept,
|
.accept = inet_csk_accept,
|
||||||
.connect = mptcp_connect,
|
|
||||||
.shutdown = tcp_shutdown,
|
.shutdown = tcp_shutdown,
|
||||||
.sendmsg = mptcp_sendmsg,
|
.sendmsg = mptcp_sendmsg,
|
||||||
.recvmsg = mptcp_recvmsg,
|
.recvmsg = mptcp_recvmsg,
|
||||||
.hash = inet_hash,
|
.hash = inet_hash,
|
||||||
.unhash = inet_unhash,
|
.unhash = inet_unhash,
|
||||||
.get_port = inet_csk_get_port,
|
.get_port = mptcp_get_port,
|
||||||
.obj_size = sizeof(struct mptcp_sock),
|
.obj_size = sizeof(struct mptcp_sock),
|
||||||
.no_autobind = true,
|
.no_autobind = true,
|
||||||
};
|
};
|
||||||
|
@ -40,19 +40,47 @@
|
|||||||
struct mptcp_sock {
|
struct mptcp_sock {
|
||||||
/* inet_connection_sock must be the first member */
|
/* inet_connection_sock must be the first member */
|
||||||
struct inet_connection_sock sk;
|
struct inet_connection_sock sk;
|
||||||
|
u64 local_key;
|
||||||
|
u64 remote_key;
|
||||||
|
struct list_head conn_list;
|
||||||
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
|
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define mptcp_for_each_subflow(__msk, __subflow) \
|
||||||
|
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
|
||||||
|
|
||||||
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
|
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
|
||||||
{
|
{
|
||||||
return (struct mptcp_sock *)sk;
|
return (struct mptcp_sock *)sk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct mptcp_subflow_request_sock {
|
||||||
|
struct tcp_request_sock sk;
|
||||||
|
u8 mp_capable : 1,
|
||||||
|
mp_join : 1,
|
||||||
|
backup : 1;
|
||||||
|
u64 local_key;
|
||||||
|
u64 remote_key;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct mptcp_subflow_request_sock *
|
||||||
|
mptcp_subflow_rsk(const struct request_sock *rsk)
|
||||||
|
{
|
||||||
|
return (struct mptcp_subflow_request_sock *)rsk;
|
||||||
|
}
|
||||||
|
|
||||||
/* MPTCP subflow context */
|
/* MPTCP subflow context */
|
||||||
struct mptcp_subflow_context {
|
struct mptcp_subflow_context {
|
||||||
u32 request_mptcp : 1; /* send MP_CAPABLE */
|
struct list_head node;/* conn_list of subflows */
|
||||||
|
u64 local_key;
|
||||||
|
u64 remote_key;
|
||||||
|
u32 request_mptcp : 1, /* send MP_CAPABLE */
|
||||||
|
mp_capable : 1, /* remote is MPTCP capable */
|
||||||
|
fourth_ack : 1, /* send initial DSS */
|
||||||
|
conn_finished : 1;
|
||||||
struct sock *tcp_sock; /* tcp sk backpointer */
|
struct sock *tcp_sock; /* tcp sk backpointer */
|
||||||
struct sock *conn; /* parent mptcp_sock */
|
struct sock *conn; /* parent mptcp_sock */
|
||||||
|
const struct inet_connection_sock_af_ops *icsk_af_ops;
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -74,4 +102,14 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
|
|||||||
void mptcp_subflow_init(void);
|
void mptcp_subflow_init(void);
|
||||||
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
|
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
|
||||||
|
|
||||||
|
extern const struct inet_connection_sock_af_ops ipv4_specific;
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
extern const struct inet_connection_sock_af_ops ipv6_specific;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void mptcp_get_options(const struct sk_buff *skb,
|
||||||
|
struct tcp_options_received *opt_rx);
|
||||||
|
|
||||||
|
void mptcp_finish_connect(struct sock *sk);
|
||||||
|
|
||||||
#endif /* __MPTCP_PROTOCOL_H */
|
#endif /* __MPTCP_PROTOCOL_H */
|
||||||
|
@ -12,9 +12,188 @@
|
|||||||
#include <net/inet_hashtables.h>
|
#include <net/inet_hashtables.h>
|
||||||
#include <net/protocol.h>
|
#include <net/protocol.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
#include <net/ip6_route.h>
|
||||||
|
#endif
|
||||||
#include <net/mptcp.h>
|
#include <net/mptcp.h>
|
||||||
#include "protocol.h"
|
#include "protocol.h"
|
||||||
|
|
||||||
|
static void subflow_init_req(struct request_sock *req,
|
||||||
|
const struct sock *sk_listener,
|
||||||
|
struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
|
||||||
|
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||||
|
struct tcp_options_received rx_opt;
|
||||||
|
|
||||||
|
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
|
||||||
|
|
||||||
|
memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
|
||||||
|
mptcp_get_options(skb, &rx_opt);
|
||||||
|
|
||||||
|
subflow_req->mp_capable = 0;
|
||||||
|
|
||||||
|
#ifdef CONFIG_TCP_MD5SIG
|
||||||
|
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
|
||||||
|
* TCP option space.
|
||||||
|
*/
|
||||||
|
if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
|
||||||
|
subflow_req->mp_capable = 1;
|
||||||
|
subflow_req->remote_key = rx_opt.mptcp.sndr_key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void subflow_v4_init_req(struct request_sock *req,
|
||||||
|
const struct sock *sk_listener,
|
||||||
|
struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
tcp_rsk(req)->is_mptcp = 1;
|
||||||
|
|
||||||
|
tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
|
||||||
|
|
||||||
|
subflow_init_req(req, sk_listener, skb);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
static void subflow_v6_init_req(struct request_sock *req,
|
||||||
|
const struct sock *sk_listener,
|
||||||
|
struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
tcp_rsk(req)->is_mptcp = 1;
|
||||||
|
|
||||||
|
tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
|
||||||
|
|
||||||
|
subflow_init_req(req, sk_listener, skb);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
|
||||||
|
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
|
||||||
|
|
||||||
|
if (subflow->conn && !subflow->conn_finished) {
|
||||||
|
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
|
||||||
|
subflow->remote_key);
|
||||||
|
mptcp_finish_connect(sk);
|
||||||
|
subflow->conn_finished = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request_sock_ops subflow_request_sock_ops;
|
||||||
|
static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
|
||||||
|
|
||||||
|
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
|
||||||
|
pr_debug("subflow=%p", subflow);
|
||||||
|
|
||||||
|
/* Never answer to SYNs sent to broadcast or multicast */
|
||||||
|
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
|
||||||
|
goto drop;
|
||||||
|
|
||||||
|
return tcp_conn_request(&subflow_request_sock_ops,
|
||||||
|
&subflow_request_sock_ipv4_ops,
|
||||||
|
sk, skb);
|
||||||
|
drop:
|
||||||
|
tcp_listendrop(sk);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
|
||||||
|
static struct inet_connection_sock_af_ops subflow_v6_specific;
|
||||||
|
static struct inet_connection_sock_af_ops subflow_v6m_specific;
|
||||||
|
|
||||||
|
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
|
||||||
|
pr_debug("subflow=%p", subflow);
|
||||||
|
|
||||||
|
if (skb->protocol == htons(ETH_P_IP))
|
||||||
|
return subflow_v4_conn_request(sk, skb);
|
||||||
|
|
||||||
|
if (!ipv6_unicast_destination(skb))
|
||||||
|
goto drop;
|
||||||
|
|
||||||
|
return tcp_conn_request(&subflow_request_sock_ops,
|
||||||
|
&subflow_request_sock_ipv6_ops, sk, skb);
|
||||||
|
|
||||||
|
drop:
|
||||||
|
tcp_listendrop(sk);
|
||||||
|
return 0; /* don't send reset */
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
struct request_sock *req,
|
||||||
|
struct dst_entry *dst,
|
||||||
|
struct request_sock *req_unhash,
|
||||||
|
bool *own_req)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
|
||||||
|
struct sock *child;
|
||||||
|
|
||||||
|
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
|
||||||
|
|
||||||
|
/* if the sk is MP_CAPABLE, we already received the client key */
|
||||||
|
|
||||||
|
child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
|
||||||
|
req_unhash, own_req);
|
||||||
|
|
||||||
|
if (child && *own_req) {
|
||||||
|
if (!mptcp_subflow_ctx(child)) {
|
||||||
|
pr_debug("Closing child socket");
|
||||||
|
inet_sk_set_state(child, TCP_CLOSE);
|
||||||
|
sock_set_flag(child, SOCK_DEAD);
|
||||||
|
inet_csk_destroy_sock(child);
|
||||||
|
child = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct inet_connection_sock_af_ops subflow_specific;
|
||||||
|
|
||||||
|
static struct inet_connection_sock_af_ops *
|
||||||
|
subflow_default_af_ops(struct sock *sk)
|
||||||
|
{
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
if (sk->sk_family == AF_INET6)
|
||||||
|
return &subflow_v6_specific;
|
||||||
|
#endif
|
||||||
|
return &subflow_specific;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped)
|
||||||
|
{
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||||
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
struct inet_connection_sock_af_ops *target;
|
||||||
|
|
||||||
|
target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
|
||||||
|
|
||||||
|
pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
|
||||||
|
subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
|
||||||
|
|
||||||
|
if (likely(icsk->icsk_af_ops == target))
|
||||||
|
return;
|
||||||
|
|
||||||
|
subflow->icsk_af_ops = icsk->icsk_af_ops;
|
||||||
|
icsk->icsk_af_ops = target;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
||||||
{
|
{
|
||||||
struct mptcp_subflow_context *subflow;
|
struct mptcp_subflow_context *subflow;
|
||||||
@ -22,7 +201,8 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
|
|||||||
struct socket *sf;
|
struct socket *sf;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sf);
|
err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
|
||||||
|
&sf);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
@ -60,6 +240,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
|
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
|
||||||
|
INIT_LIST_HEAD(&ctx->node);
|
||||||
|
|
||||||
pr_debug("subflow=%p", ctx);
|
pr_debug("subflow=%p", ctx);
|
||||||
|
|
||||||
@ -70,6 +251,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
|
|||||||
|
|
||||||
static int subflow_ulp_init(struct sock *sk)
|
static int subflow_ulp_init(struct sock *sk)
|
||||||
{
|
{
|
||||||
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct mptcp_subflow_context *ctx;
|
struct mptcp_subflow_context *ctx;
|
||||||
struct tcp_sock *tp = tcp_sk(sk);
|
struct tcp_sock *tp = tcp_sk(sk);
|
||||||
int err = 0;
|
int err = 0;
|
||||||
@ -91,6 +273,8 @@ static int subflow_ulp_init(struct sock *sk)
|
|||||||
pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
|
pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
|
||||||
|
|
||||||
tp->is_mptcp = 1;
|
tp->is_mptcp = 1;
|
||||||
|
ctx->icsk_af_ops = icsk->icsk_af_ops;
|
||||||
|
icsk->icsk_af_ops = subflow_default_af_ops(sk);
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -105,15 +289,97 @@ static void subflow_ulp_release(struct sock *sk)
|
|||||||
kfree_rcu(ctx, rcu);
|
kfree_rcu(ctx, rcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void subflow_ulp_fallback(struct sock *sk)
|
||||||
|
{
|
||||||
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
|
||||||
|
icsk->icsk_ulp_ops = NULL;
|
||||||
|
rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
|
||||||
|
tcp_sk(sk)->is_mptcp = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void subflow_ulp_clone(const struct request_sock *req,
|
||||||
|
struct sock *newsk,
|
||||||
|
const gfp_t priority)
|
||||||
|
{
|
||||||
|
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||||
|
struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
|
||||||
|
struct mptcp_subflow_context *new_ctx;
|
||||||
|
|
||||||
|
if (!subflow_req->mp_capable) {
|
||||||
|
subflow_ulp_fallback(newsk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_ctx = subflow_create_ctx(newsk, priority);
|
||||||
|
if (new_ctx == NULL) {
|
||||||
|
subflow_ulp_fallback(newsk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_ctx->conn_finished = 1;
|
||||||
|
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
|
||||||
|
new_ctx->mp_capable = 1;
|
||||||
|
new_ctx->fourth_ack = 1;
|
||||||
|
new_ctx->remote_key = subflow_req->remote_key;
|
||||||
|
new_ctx->local_key = subflow_req->local_key;
|
||||||
|
}
|
||||||
|
|
||||||
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
|
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
|
||||||
.name = "mptcp",
|
.name = "mptcp",
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
.init = subflow_ulp_init,
|
.init = subflow_ulp_init,
|
||||||
.release = subflow_ulp_release,
|
.release = subflow_ulp_release,
|
||||||
|
.clone = subflow_ulp_clone,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int subflow_ops_init(struct request_sock_ops *subflow_ops)
|
||||||
|
{
|
||||||
|
subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
|
||||||
|
subflow_ops->slab_name = "request_sock_subflow";
|
||||||
|
|
||||||
|
subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
|
||||||
|
subflow_ops->obj_size, 0,
|
||||||
|
SLAB_ACCOUNT |
|
||||||
|
SLAB_TYPESAFE_BY_RCU,
|
||||||
|
NULL);
|
||||||
|
if (!subflow_ops->slab)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void mptcp_subflow_init(void)
|
void mptcp_subflow_init(void)
|
||||||
{
|
{
|
||||||
|
subflow_request_sock_ops = tcp_request_sock_ops;
|
||||||
|
if (subflow_ops_init(&subflow_request_sock_ops) != 0)
|
||||||
|
panic("MPTCP: failed to init subflow request sock ops\n");
|
||||||
|
|
||||||
|
subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
|
||||||
|
subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
|
||||||
|
|
||||||
|
subflow_specific = ipv4_specific;
|
||||||
|
subflow_specific.conn_request = subflow_v4_conn_request;
|
||||||
|
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
|
||||||
|
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
|
||||||
|
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
|
||||||
|
subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
|
||||||
|
|
||||||
|
subflow_v6_specific = ipv6_specific;
|
||||||
|
subflow_v6_specific.conn_request = subflow_v6_conn_request;
|
||||||
|
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
|
||||||
|
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
|
||||||
|
|
||||||
|
subflow_v6m_specific = subflow_v6_specific;
|
||||||
|
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
|
||||||
|
subflow_v6m_specific.send_check = ipv4_specific.send_check;
|
||||||
|
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
|
||||||
|
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
|
||||||
|
subflow_v6m_specific.net_frag_header_len = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tcp_register_ulp(&subflow_ulp_ops) != 0)
|
if (tcp_register_ulp(&subflow_ulp_ops) != 0)
|
||||||
panic("MPTCP: failed to register subflows to ULP\n");
|
panic("MPTCP: failed to register subflows to ULP\n");
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user