81 lines
3.1 KiB
Diff
81 lines
3.1 KiB
Diff
|
From 98e09386c0ef4dfd48af7ba60ff908f0d525cdee Mon Sep 17 00:00:00 2001
|
||
|
From: Eric Dumazet <edumazet@google.com>
|
||
|
Date: Wed, 13 Nov 2013 14:32:54 +0000
|
||
|
Subject: tcp: tsq: restore minimal amount of queueing
|
||
|
|
||
|
After commit c9eeec26e32e ("tcp: TSQ can use a dynamic limit"), several
|
||
|
users reported throughput regressions, notably on mvneta and wifi
|
||
|
adapters.
|
||
|
|
||
|
802.11 AMPDU requires a fair amount of queueing to be effective.
|
||
|
|
||
|
This patch partially reverts the change done in tcp_write_xmit()
|
||
|
so that the minimal amount is sysctl_tcp_limit_output_bytes.
|
||
|
|
||
|
It also remove the use of this sysctl while building skb stored
|
||
|
in write queue, as TSO autosizing does the right thing anyway.
|
||
|
|
||
|
Users with well behaving NICS and correct qdisc (like sch_fq),
|
||
|
can then lower the default sysctl_tcp_limit_output_bytes value from
|
||
|
128KB to 8KB.
|
||
|
|
||
|
This new usage of sysctl_tcp_limit_output_bytes permits each driver
|
||
|
authors to check how their driver performs when/if the value is set
|
||
|
to a minimum of 4KB.
|
||
|
|
||
|
Normally, line rate for a single TCP flow should be possible,
|
||
|
but some drivers rely on timers to perform TX completion and
|
||
|
too long TX completion delays prevent reaching full throughput.
|
||
|
|
||
|
Fixes: c9eeec26e32e ("tcp: TSQ can use a dynamic limit")
|
||
|
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||
|
Reported-by: Sujith Manoharan <sujith@msujith.org>
|
||
|
Reported-by: Arnaud Ebalard <arno@natisbad.org>
|
||
|
Tested-by: Sujith Manoharan <sujith@msujith.org>
|
||
|
Cc: Felix Fietkau <nbd@openwrt.org>
|
||
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||
|
---
|
||
|
--- a/Documentation/networking/ip-sysctl.txt
|
||
|
+++ b/Documentation/networking/ip-sysctl.txt
|
||
|
@@ -571,9 +571,6 @@ tcp_limit_output_bytes - INTEGER
|
||
|
typical pfifo_fast qdiscs.
|
||
|
tcp_limit_output_bytes limits the number of bytes on qdisc
|
||
|
or device to reduce artificial RTT/cwnd and reduce bufferbloat.
|
||
|
- Note: For GSO/TSO enabled flows, we try to have at least two
|
||
|
- packets in flight. Reducing tcp_limit_output_bytes might also
|
||
|
- reduce the size of individual GSO packet (64KB being the max)
|
||
|
Default: 131072
|
||
|
|
||
|
tcp_challenge_ack_limit - INTEGER
|
||
|
--- a/net/ipv4/tcp.c
|
||
|
+++ b/net/ipv4/tcp.c
|
||
|
@@ -807,12 +807,6 @@ static unsigned int tcp_xmit_size_goal(s
|
||
|
xmit_size_goal = min_t(u32, gso_size,
|
||
|
sk->sk_gso_max_size - 1 - hlen);
|
||
|
|
||
|
- /* TSQ : try to have at least two segments in flight
|
||
|
- * (one in NIC TX ring, another in Qdisc)
|
||
|
- */
|
||
|
- xmit_size_goal = min_t(u32, xmit_size_goal,
|
||
|
- sysctl_tcp_limit_output_bytes >> 1);
|
||
|
-
|
||
|
xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
|
||
|
|
||
|
/* We try hard to avoid divides here */
|
||
|
--- a/net/ipv4/tcp_output.c
|
||
|
+++ b/net/ipv4/tcp_output.c
|
||
|
@@ -1866,8 +1866,12 @@ static bool tcp_write_xmit(struct sock *
|
||
|
* - better RTT estimation and ACK scheduling
|
||
|
* - faster recovery
|
||
|
* - high rates
|
||
|
+ * Alas, some drivers / subsystems require a fair amount
|
||
|
+ * of queued bytes to ensure line rate.
|
||
|
+ * One example is wifi aggregation (802.11 AMPDU)
|
||
|
*/
|
||
|
- limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
|
||
|
+ limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
|
||
|
+ sk->sk_pacing_rate >> 10);
|
||
|
|
||
|
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
|
||
|
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
|