diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 248e3930c0cbb1a9436eadd59f9674604b5b2267..8a59b3e445999d978c2cd7c87b4824c48be66572 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -530,6 +530,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; + WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e1606e072218a22efa5aba995e6194fbe3ad21f2..4b7794835feacb23dd446cd775efffa2e462045f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -407,16 +407,42 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } -static void mptcp_send_ack(struct mptcp_sock *msk) +static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +{ + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ + if (subflow->request_join && !subflow->fully_established) + return false; + + /* only send if our side has not closed yet */ + return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); +} + +static void mptcp_send_ack(struct mptcp_sock *msk, bool force) { struct mptcp_subflow_context *subflow; + struct sock *pick = NULL; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - lock_sock(ssk); - tcp_send_ack(ssk); - release_sock(ssk); + if (force) { + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + continue; + } + + /* if the hintes ssk is still active, use it */ + pick = ssk; + if (ssk == msk->ack_hint) + break; + } + if (!force && pick) { + lock_sock(pick); + tcp_cleanup_rbuf(pick, 1); + release_sock(pick); } } @@ -468,7 +494,7 @@ static bool mptcp_check_data_fin(struct sock *sk) ret = true; mptcp_set_timeout(sk, NULL); - mptcp_send_ack(msk); + mptcp_send_ack(msk, true); mptcp_close_wake_up(sk); } return ret; @@ -483,7 +509,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, unsigned int moved = 0; bool more_data_avail; struct tcp_sock *tp; - u32 old_copied_seq; bool done = false; int sk_rbuf; @@ -500,7 +525,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, pr_debug("msk=%p ssk=%p", msk, ssk); tp = tcp_sk(ssk); - old_copied_seq = tp->copied_seq; do { u32 map_remaining, offset; u32 seq = tp->copied_seq; @@ -564,11 +588,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, break; } } while (more_data_avail); + msk->ack_hint = ssk; *bytes += moved; - if (tp->copied_seq != old_copied_seq) - tcp_cleanup_rbuf(ssk, 1); - return done; } @@ -672,19 +694,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) goto wake; - if (move_skbs_to_msk(msk, ssk)) - goto wake; + move_skbs_to_msk(msk, ssk); - /* mptcp socket is owned, release_cb should retry */ - if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, - &sk->sk_tsq_flags)) { - sock_hold(sk); - - /* need to try again, its possible release_cb() has already - * been called after the test_and_set_bit() above. - */ - move_skbs_to_msk(msk, ssk); - } wake: if (wake) sk->sk_data_ready(sk); @@ -1095,18 +1106,6 @@ static void mptcp_nospace(struct mptcp_sock *msk) mptcp_clean_una((struct sock *)msk); } -static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) -{ - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - /* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */ - if (subflow->request_join && !subflow->fully_established) - return false; - - /* only send if our side has not closed yet */ - return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); -} - #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \ sizeof(struct tcphdr) - \ MAX_TCP_OPTION_SPACE - \ @@ -1534,7 +1533,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) msk->rcvq_space.time = mstamp; } -static bool __mptcp_move_skbs(struct mptcp_sock *msk) +static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv) { unsigned int moved = 0; bool done; @@ -1553,12 +1552,16 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) slowpath = lock_sock_fast(ssk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); + if (moved && rcv) { + WRITE_ONCE(msk->rmem_pending, min(rcv, moved)); + tcp_cleanup_rbuf(ssk, 1); + WRITE_ONCE(msk->rmem_pending, 0); + } unlock_sock_fast(ssk, slowpath); } while (!done); if (mptcp_ofo_queue(msk) || moved > 0) { - if (!mptcp_check_data_fin((struct sock *)msk)) - mptcp_send_ack(msk); + mptcp_check_data_fin((struct sock *)msk); return true; } return false; @@ -1582,8 +1585,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); __mptcp_flush_join_list(msk); - while (len > (size_t)copied) { - int bytes_read; + for (;;) { + int bytes_read, old_space; bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied); if (unlikely(bytes_read < 0)) { @@ -1595,9 +1598,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; if (skb_queue_empty(&sk->sk_receive_queue) && - __mptcp_move_skbs(msk)) + __mptcp_move_skbs(msk, len - copied)) continue; + /* be sure to advertise window change */ + old_space = READ_ONCE(msk->old_wspace); + if ((tcp_space(sk) - old_space) >= old_space) + mptcp_send_ack(msk, false); + /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() */ @@ -1650,7 +1658,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* .. race-breaker: ssk might have gotten new data * after last __mptcp_move_skbs() returned false. */ - if (unlikely(__mptcp_move_skbs(msk))) + if (unlikely(__mptcp_move_skbs(msk, 0))) set_bit(MPTCP_DATA_READY, &msk->flags); } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { /* data to read but mptcp_wait_data() cleared DATA_READY */ @@ -1881,7 +1889,6 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - __mptcp_move_skbs(msk); if (mptcp_send_head(sk)) mptcp_push_pending(sk, 0); @@ -1965,6 +1972,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; + msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -2500,8 +2508,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, return -EOPNOTSUPP; } -#define MPTCP_DEFERRED_ALL (TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_WRITE_TIMER_DEFERRED) +#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) /* this is very alike tcp_release_cb() but we must handle differently a * different set of events @@ -2519,16 +2526,6 @@ static void mptcp_release_cb(struct sock *sk) sock_release_ownership(sk); - if (flags & TCPF_DELACK_TIMER_DEFERRED) { - struct mptcp_sock *msk = mptcp_sk(sk); - struct sock *ssk; - - ssk = mptcp_subflow_recv_lookup(msk); - if (!ssk || sk->sk_state == TCP_CLOSE || - !schedule_work(&msk->work)) - __sock_put(sk); - } - if (flags & TCPF_WRITE_TIMER_DEFERRED) { mptcp_retransmit_handler(sk); __sock_put(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 67f86818203f30129c95679facea2dd873b39123..82d5626323b18665f83feafea43be70b0c1fcb92 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -220,10 +220,12 @@ struct mptcp_sock { u64 rcv_data_fin_seq; struct sock *last_snd; int snd_burst; + int old_wspace; atomic64_t snd_una; atomic64_t wnd_end; unsigned long timer_ival; u32 token; + int rmem_pending; unsigned long flags; bool can_ack; bool fully_established; @@ -231,6 +233,7 @@ struct mptcp_sock { bool snd_data_fin_enable; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ spinlock_t join_list_lock; + struct sock *ack_hint; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -258,6 +261,11 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) return (struct mptcp_sock *)sk; } +static inline int __mptcp_space(const struct sock *sk) +{ + return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending); +} + static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d3c6b3a5ad555fd8123644b1aa0990106a367927..4d8abff1be1830adef97534de701829d49b3939b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -850,8 +850,6 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) subflow->map_valid = 0; - if (incr) - tcp_cleanup_rbuf(ssk, incr); } static bool subflow_check_data_avail(struct sock *ssk) @@ -973,7 +971,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); const struct sock *sk = subflow->conn; - *space = tcp_space(sk); + *space = __mptcp_space(sk); *full_space = tcp_full_space(sk); }