<div dir="ltr"><div>Greg,</div>
<div> </div>
<div>do we know if that have made it to any Linux Kernel? </div>
<div> </div>
<div>kind regards</div>
<div> </div>
<div>Walid<br><br></div>
<div class="gmail_quote">On 17 December 2009 05:05, Greg Lindahl <span dir="ltr"><<a href="mailto:lindahl@pbm.com">lindahl@pbm.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="PADDING-LEFT: 1ex; MARGIN: 0px 0px 0px 0.8ex; BORDER-LEFT: #ccc 1px solid">The following patch, not yet accepted into the kernel, should allow<br>local TCP connections to start up faster, while remote ones keep the<br>
same behavior of slow start.<br><br>----- Forwarded message from <a href="mailto:chavey@google.com">chavey@google.com</a> -----<br><br>From: <a href="mailto:chavey@google.com">chavey@google.com</a><br>Date: Tue, 15 Dec 2009 13:15:28 -0800<br>
To: <a href="mailto:davem@davemloft.net">davem@davemloft.net</a><br>CC: <a href="mailto:netdev@vger.kernel.org">netdev@vger.kernel.org</a>, <a href="mailto:therbert@google.com">therbert@google.com</a>, <a href="mailto:chavey@google.com">chavey@google.com</a>,<br>
       <a href="mailto:eric.dumazet@gmail.com">eric.dumazet@gmail.com</a><br>Subject: [PATCH] Add rtnetlink init_rcvwnd to set the TCP initial receive window<br>X-Mailing-List: <a href="mailto:netdev@vger.kernel.org">netdev@vger.kernel.org</a><br>
<br>Add rtnetlink init_rcvwnd to set the TCP initial receive window size<br>advertised by passive and active TCP connections.<br>The current Linux TCP implementation limits the advertised TCP initial<br>receive window to the one prescribed by slow start. For short lived<br>
TCP connections used for transaction type of traffic (i.e. http<br>requests), bounding the advertised TCP initial receive window results<br>in increased latency to complete the transaction.<br>Support for setting initial congestion window is already supported<br>
using rtnetlink init_cwnd, but the feature is useless without the<br>ability to set a larger TCP initial receive window.<br>The rtnetlink init_rcvwnd allows increasing the TCP initial receive<br>window, allowing TCP connection to advertise larger TCP receive window<br>
than the ones bounded by slow start.<br><br>Signed-off-by: Laurent Chavey <<a href="mailto:chavey@google.com">chavey@google.com</a>><br>---<br> include/linux/rtnetlink.h |    2 ++<br> include/net/dst.h         |    2 --<br>
 include/net/tcp.h         |    3 ++-<br> net/ipv4/syncookies.c     |    3 ++-<br> net/ipv4/tcp_output.c     |   17 +++++++++++++----<br> net/ipv6/syncookies.c     |    3 ++-<br> 6 files changed, 21 insertions(+), 9 deletions(-)<br>
<br>diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h<br>index adf2068..db6f614 100644<br>--- a/include/linux/rtnetlink.h<br>+++ b/include/linux/rtnetlink.h<br>@@ -371,6 +371,8 @@ enum<br> #define RTAX_FEATURES RTAX_FEATURES<br>
       RTAX_RTO_MIN,<br> #define RTAX_RTO_MIN RTAX_RTO_MIN<br>+       RTAX_INITRWND,<br>+#define RTAX_INITRWND RTAX_INITRWND<br>       __RTAX_MAX<br> };<br><br>diff --git a/include/net/dst.h b/include/net/dst.h<br>index 5a900dd..6ef812a 100644<br>
--- a/include/net/dst.h<br>+++ b/include/net/dst.h<br>@@ -84,8 +84,6 @@ struct dst_entry<br>        * (L1_CACHE_SIZE would be too much)<br>        */<br> #ifdef CONFIG_64BIT<br>-       long                    __pad_to_align_refcnt[2];<br>
-#else<br>       long                    __pad_to_align_refcnt[1];<br> #endif<br>       /*<br>diff --git a/include/net/tcp.h b/include/net/tcp.h<br>index 03a49c7..6f95d32 100644<br>--- a/include/net/tcp.h<br>+++ b/include/net/tcp.h<br>
@@ -972,7 +972,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)<br> /* Determine a window scaling and initial window to offer. */<br> extern void tcp_select_initial_window(int __space, __u32 mss,<br>
                                     __u32 *rcv_wnd, __u32 *window_clamp,<br>-                                     int wscale_ok, __u8 *rcv_wscale);<br>+                                     int wscale_ok, __u8 *rcv_wscale,<br>
+                                     __u32 init_rcv_wnd);<br><br> static inline int tcp_win_from_space(int space)<br> {<br>diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c<br>index a6e0e07..d43173c 100644<br>--- a/net/ipv4/syncookies.c<br>
+++ b/net/ipv4/syncookies.c<br>@@ -356,7 +356,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,<br><br>       tcp_select_initial_window(tcp_full_space(sk), req->mss,<br>                                 &req->rcv_wnd, &req->window_clamp,<br>
-                                 ireq->wscale_ok, &rcv_wscale);<br>+                                 ireq->wscale_ok, &rcv_wscale,<br>+                                 dst_metric(&rt->u.dst, RTAX_INITRWND));<br>
<br>       ireq->rcv_wscale  = rcv_wscale;<br><br>diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c<br>index fcd278a..ee42c75 100644<br>--- a/net/ipv4/tcp_output.c<br>+++ b/net/ipv4/tcp_output.c<br>@@ -179,7 +179,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)<br>
 */<br> void tcp_select_initial_window(int __space, __u32 mss,<br>                              __u32 *rcv_wnd, __u32 *window_clamp,<br>-                              int wscale_ok, __u8 *rcv_wscale)<br>+                              int wscale_ok, __u8 *rcv_wscale,<br>
+                              __u32 init_rcv_wnd)<br> {<br>       unsigned int space = (__space < 0 ? 0 : __space);<br><br>@@ -228,7 +229,13 @@ void tcp_select_initial_window(int __space, __u32 mss,<br>                       init_cwnd = 2;<br>
               else if (mss > 1460)<br>                       init_cwnd = 3;<br>-               if (*rcv_wnd > init_cwnd * mss)<br>+               /* when initializing use the value from init_rcv_wnd<br>+                * rather than the default from above<br>
+                */<br>+               if (init_rcv_wnd &&<br>+                   (*rcv_wnd > init_rcv_wnd * mss))<br>+                       *rcv_wnd = init_rcv_wnd * mss;<br>+               else if (*rcv_wnd > init_cwnd * mss)<br>
                       *rcv_wnd = init_cwnd * mss;<br>       }<br><br>@@ -2254,7 +2261,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,<br>                       &req->rcv_wnd,<br>                       &req->window_clamp,<br>
                       ireq->wscale_ok,<br>-                       &rcv_wscale);<br>+                       &rcv_wscale,<br>+                       dst_metric(dst, RTAX_INITRWND));<br>               ireq->rcv_wscale = rcv_wscale;<br>
       }<br><br>@@ -2342,7 +2350,8 @@ static void tcp_connect_init(struct sock *sk)<br>                                 &tp->rcv_wnd,<br>                                 &tp->window_clamp,<br>                                 sysctl_tcp_window_scaling,<br>
-                                 &rcv_wscale);<br>+                                 &rcv_wscale,<br>+                                 dst_metric(dst, RTAX_INITRWND));<br><br>       tp->rx_opt.rcv_wscale = rcv_wscale;<br>
       tp->rcv_ssthresh = tp->rcv_wnd;<br>diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c<br>index 6b6ae91..c8982aa 100644<br>--- a/net/ipv6/syncookies.c<br>+++ b/net/ipv6/syncookies.c<br>@@ -267,7 +267,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)<br>
       req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);<br>       tcp_select_initial_window(tcp_full_space(sk), req->mss,<br>                                 &req->rcv_wnd, &req->window_clamp,<br>
-                                 ireq->wscale_ok, &rcv_wscale);<br>+                                 ireq->wscale_ok, &rcv_wscale,<br>+                                 dst_metric(dst, RTAX_INITRWND));<br><br>
       ireq->rcv_wscale = rcv_wscale;<br><br>--<br>To unsubscribe from this list: send the line "unsubscribe netdev" in<br>the body of a message to <a href="mailto:majordomo@vger.kernel.org">majordomo@vger.kernel.org</a><br>
More majordomo info at  <a href="http://vger.kernel.org/majordomo-info.html" target="_blank">http://vger.kernel.org/majordomo-info.html</a><br><br>----- End forwarded message -----<br>_______________________________________________<br>
Beowulf mailing list, <a href="mailto:Beowulf@beowulf.org">Beowulf@beowulf.org</a> sponsored by Penguin Computing<br>To change your subscription (digest mode or unsubscribe) visit <a href="http://www.beowulf.org/mailman/listinfo/beowulf" target="_blank">http://www.beowulf.org/mailman/listinfo/beowulf</a><br>
</blockquote></div><br></div>