RFC3522 for FreeBSD. Implemented by Jeffrey Hsu and ported by delphij $Phantasm: delphijweb/research/freebsd/rfc3522.diff,v 1.17 2007/05/17 02:25:18 delphij Exp $ Index: sys/netinet/tcp_input.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.351 diff -u -p -u -r1.351 tcp_input.c --- sys/netinet/tcp_input.c 16 May 2007 17:14:25 -0000 1.351 +++ sys/netinet/tcp_input.c 17 May 2007 02:17:03 -0000 @@ -1,4 +1,36 @@ /*- + * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu. All rights reserved. + * Copyright (c) 2002, 2003, 2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Jeffrey M. Hsu. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * @@ -129,6 +161,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3 &tcp_do_rfc3390, 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +static int tcp_do_eifel_detect = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel, CTLFLAG_RW, + &tcp_do_eifel_detect, 0, + "Eifel detection algorithm (RFC 3522)"); + static int tcp_insecure_rst = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW, &tcp_insecure_rst, 0, @@ -946,19 +983,26 @@ tcp_do_segment(struct mbuf *m, struct tc ++tcpstat.tcps_predack; /* * "bad retransmit" recovery - */ - if (tp->t_rxtshift == 1 && - ticks < tp->t_badrxtwin) { - ++tcpstat.tcps_sndrexmitbad; - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = - tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; + * + * If Eifel detection applies, then + * it is deterministic, so use it + * unconditionally over the old heuristic. + * Otherwise, fall back to the old heuristic. + */ + if (tcp_do_eifel_detect && + (to.to_flags & TOF_TS) && to.to_tsecr && + (tp->t_flags & TF_FIRSTACCACK)) { + /* Eifel detection applicable. */ + if (to.to_tsecr < tp->t_rexmtTS) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_eifeldetected; + } + } else if (tp->t_rxtshift == 1 && + ticks < tp->t_badrxtwin) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_rttdetected; } + tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT); /* * Recalculate the transmit timer / rtt. @@ -1730,6 +1774,11 @@ tcp_do_segment(struct mbuf *m, struct tc break; } } + if (tcp_do_eifel_detect && + (tp->t_flags & TF_RCVD_TSTMP)) { + tcp_save_congestion_state(tp); + tp->t_flags |= TF_FASTREXMT; + } win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) @@ -1871,15 +1920,17 @@ process_ACK: * original cwnd and ssthresh, and proceed to transmit where * we left off. */ - if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) { - ++tcpstat.tcps_sndrexmitbad; - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; /* XXX probably not required */ + if (tcp_do_eifel_detect && acked && + (to.to_flags & TOF_TS) && to.to_tsecr && + (tp->t_flags & TF_FIRSTACCACK)) { + /* Eifel detection applicable. */ + if (to.to_tsecr < tp->t_rexmtTS) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_eifeldetected; + } + } else if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) { + tcp_revert_congestion_state(tp); + ++tcpstat.tcps_rttdetected; } /* @@ -1927,6 +1978,9 @@ process_ACK: if (acked == 0) goto step6; + /* Stop looking for an acceptable ACK since one was received. */ + tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT); + /* * When new data is acked, open the congestion window. * If the window gives us less than ssthresh packets Index: sys/netinet/tcp_timer.c =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_timer.c,v retrieving revision 1.93 diff -u -p -u -r1.93 tcp_timer.c --- sys/netinet/tcp_timer.c 16 May 2007 17:14:25 -0000 1.93 +++ sys/netinet/tcp_timer.c 17 May 2007 02:17:03 -0000 @@ -1,4 +1,36 @@ /*- + * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu. All rights reserved. + * Copyright (c) 2002, 2003, 2004 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Jeffrey M. Hsu. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * @@ -562,6 +594,39 @@ tcp_timer_persist(struct tcpcb *tp, stru return (0); } +void +tcp_save_congestion_state(struct tcpcb *tp) +{ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->snd_recover_prev = tp->snd_recover; + if (IN_FASTRECOVERY(tp)) + tp->t_flags |= TF_WASFRECOVERY; + else + tp->t_flags &= ~TF_WASFRECOVERY; + if (tp->t_flags & TF_RCVD_TSTMP) { + tp->t_rexmtTS = ticks; + tp->t_flags |= TF_FIRSTACCACK; + } +} + +void +tcp_revert_congestion_state(struct tcpcb *tp) +{ + tp->snd_cwnd = tp->snd_cwnd_prev; + tp->snd_ssthresh = tp->snd_ssthresh_prev; + tp->snd_recover = tp->snd_recover_prev; + if (tp->t_flags & TF_WASFRECOVERY) + ENTER_FASTRECOVERY(tp); + if (tp->t_flags & TF_FASTREXMT) + ++tcpstat.tcps_sndfastrexmitbad; + else + ++tcpstat.tcps_sndrtobad; + tp->t_badrxtwin = 0; + tp->t_rxtshift = 0; + tp->snd_nxt = tp->snd_max; +} + static int tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) { @@ -592,14 +657,8 @@ tcp_timer_rexmt(struct tcpcb *tp, struct * "On Estimating End-to-End Network Path Properties" by * Allman and Paxson for more details. */ - tp->snd_cwnd_prev = tp->snd_cwnd; - tp->snd_ssthresh_prev = tp->snd_ssthresh; - tp->snd_recover_prev = tp->snd_recover; - if (IN_FASTRECOVERY(tp)) - tp->t_flags |= TF_WASFRECOVERY; - else - tp->t_flags &= ~TF_WASFRECOVERY; - tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + tcp_save_congestion_state(tp); + tp->t_flags &= ~TF_FASTREXMT; } tcpstat.tcps_rexmttimeo++; if (tp->t_state == TCPS_SYN_SENT) Index: sys/netinet/tcp_var.h =================================================================== RCS file: /home/ncvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.150 diff -u -p -u -r1.150 tcp_var.h --- sys/netinet/tcp_var.h 16 May 2007 17:14:25 -0000 1.150 +++ sys/netinet/tcp_var.h 17 May 2007 02:17:03 -0000 @@ -123,6 +123,8 @@ struct tcpcb { #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ #define TF_FORCEDATA 0x800000 /* force out a byte */ #define TF_TSO 0x1000000 /* TSO enabled on this connection */ +#define TF_FIRSTACCACK 0x2000000 /* Look for 1st acceptable ACK. */ +#define TF_FASTREXMT 0x4000000 /* Did Fast Retransmit. */ tcp_seq snd_una; /* send unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; @@ -192,6 +194,7 @@ struct tcpcb { u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ u_long t_badrxtwin; /* window for retransmit recovery */ + u_long t_rexmtTS; /* timestamp of last retransmit */ u_char snd_limited; /* segments limited transmitted */ /* SACK related state */ int snd_numholes; /* number of holes seen by sender */ @@ -356,7 +359,10 @@ struct tcpstat { u_long tcps_sndbyte; /* data bytes sent */ u_long tcps_sndrexmitpack; /* data packets retransmitted */ u_long tcps_sndrexmitbyte; /* data bytes retransmitted */ - u_long tcps_sndrexmitbad; /* unnecessary packet retransmissions */ + u_long tcps_sndrtobad; /* spurious RTO retransmissions */ + u_long tcps_sndfastrexmitbad; /* spurious Fast Retransmissions */ + u_long tcps_eifeldetected; /* Eifel-detected spurious rexmits */ + u_long tcps_rttdetected; /* RTT-detected spurious RTO rexmits */ u_long tcps_sndacks; /* ack-only packets sent */ u_long tcps_sndprobe; /* window probes sent */ u_long tcps_sndurg; /* packets sent with URG only */ @@ -541,6 +547,8 @@ void tcp_tw_zone_change(void); int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, struct mbuf *, int); int tcp_twrespond(struct tcptw *, int); +void tcp_save_congestion_state(struct tcpcb *tp); +void tcp_revert_congestion_state(struct tcpcb *tp); void tcp_setpersist(struct tcpcb *); #ifdef TCP_SIGNATURE int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); Index: usr.bin/netstat/inet.c =================================================================== RCS file: /home/ncvs/src/usr.bin/netstat/inet.c,v retrieving revision 1.76 diff -u -p -u -r1.76 inet.c --- usr.bin/netstat/inet.c 13 May 2007 22:32:32 -0000 1.76 +++ usr.bin/netstat/inet.c 16 May 2007 03:27:38 -0000 @@ -385,8 +385,10 @@ tcp_stats(u_long off __unused, const cha "\t\t%lu data packet%s (%lu byte%s)\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t%lu data packet%s (%lu byte%s) retransmitted\n"); - p(tcps_sndrexmitbad, - "\t\t%lu data packet%s unnecessarily retransmitted\n"); + p(tcps_sndrtobad, "\t\t%lu spurious RTO retransmit%s\n"); + p(tcps_sndfastrexmitbad, "\t\t%lu spurious Fast Retransmit%s\n"); + p(tcps_eifeldetected, "\t\t%lu Eifel-detected spurious retransmit%s\n"); + p(tcps_rttdetected, "\t\t%lu RTT-detected spurious retransmit%s\n"); p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n"); p2a(tcps_sndacks, tcps_delack, "\t\t%lu ack-only packet%s (%lu delayed)\n");