--- Copyright (c) 2007-2008, Centre for Advanced Internet Architectures
--- Swinburne University of Technology, Melbourne, Australia
--- (CRICOS number 00111D).
--- Copyright (c) 2008-2009, Lawrence Stewart <lastewart@swin.edu.au>
---
--- All rights reserved.
---
--- CAIA Modular Congestion Control Patch v0.9.2
---
--- This patch was created against revision 190777 of the FreeBSD 8-CURRENT
--- Subversion source tree by running the following command and cleaning up the
--- output to remove irrelevant parts:
---
--- svn diff http://svn.freebsd.org/base/head/sys@190777 \
--- http://svn.freebsd.org/base/projects/tcp_cc_8.x/sys@190777
---
--- To obtain the correct revision of the FreeBSD source tree that this patch
--- applies to, and store it in the local directory "/path/to/src", run:
---
--- svn co -r 190777 http://svn.freebsd.org/base/head </path/to/src>
---
--- Make sure the base system you are installing onto is already running
--- FreeBSD 8.x before continuing.
---
--- Issuing the following commands will result in a running modular congestion
--- control capable system:
---
--- cd /path/to/src/sys
--- patch -p0 < /path/to/caia_modularcc_v0.9.2_8.x.r190777.patch
--- cd /path/to/src/
--- make buildworld buildkernel installkernel installworld
--- mergemaster -iF -m /path/to/src
--- reboot
---
--- The modular congestion control patch was first released in 2007 by
--- James Healy and Lawrence Stewart whilst working on the NewTCP research
--- project at Swinburne University's Centre for Advanced Internet
--- Architectures, Melbourne, Australia, which was made possible in part by a
--- grant from the Cisco University Research Program Fund at
--- Community Foundation Silicon Valley. More details are available at:
---     http://caia.swin.edu.au/urp/newtcp/
---
--- Lawrence Stewart has continued development of this work since 2008 in his
--- spare time, and is currently the sole maintainer. All contact regarding
--- this patch should be directed to him via email: lastewart@swin.edu.au
---
--- Redistribution and use in source and binary forms, with or without
--- modification, are permitted provided that the following conditions
--- are met:
--- 1. Redistributions of source code must retain the above copyright
---    notice, this list of conditions and the following disclaimer.
--- 2. Redistributions in binary form must reproduce the above copyright
---    notice, this list of conditions and the following disclaimer in the
---    documentation and/or other materials provided with the distribution.
--- 3. The names of the authors, "Swinburne University of Technology" and the
---    "Centre for Advanced Internet Architectures" may not be used to endorse
---    or promote products derived from this software without specific
---    prior written permission.
---
--- THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS \`\`AS IS'' AND
--- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
--- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
--- ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
--- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
--- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
--- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
--- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
--- LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
--- OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
--- SUCH DAMAGE.
---
Index: conf/files
===================================================================
--- conf/files	(.../head/sys)	(revision 190777)
+++ conf/files	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -2337,6 +2337,7 @@
 netinet/ip_options.c		optional inet
 netinet/ip_output.c		optional inet
 netinet/raw_ip.c		optional inet
+netinet/cc.c			optional inet
 netinet/sctp_asconf.c		optional inet sctp
 netinet/sctp_auth.c		optional inet sctp
 netinet/sctp_bsd_addr.c		optional inet sctp
Index: netinet/tcp_input.c
===================================================================
--- netinet/tcp_input.c	(.../head/sys)	(revision 190777)
+++ netinet/tcp_input.c	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -62,6 +62,7 @@
 
 #define TCPSTATES		/* for logging */
 
+#include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
@@ -76,7 +77,6 @@
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
-#include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -102,7 +102,7 @@
 
 #include <security/mac/mac_framework.h>
 
-static const int tcprexmtthresh = 3;
+const int tcprexmtthresh = 3;
 
 #ifdef VIMAGE_GLOBALS
 struct	tcpstat tcpstat;
@@ -1264,14 +1264,9 @@
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    tp->snd_cwnd >= tp->snd_wnd &&
-			    ((!V_tcp_do_newreno &&
-			      !(tp->t_flags & TF_SACK_PERMIT) &&
-			      tp->t_dupacks < tcprexmtthresh) ||
-			     ((V_tcp_do_newreno ||
-			       (tp->t_flags & TF_SACK_PERMIT)) &&
-			      !IN_FASTRECOVERY(tp) &&
-			      (to.to_flags & TOF_SACK) == 0 &&
-			      TAILQ_EMPTY(&tp->snd_holes)))) {
+			    !IN_FASTRECOVERY(tp) &&
+			    (to.to_flags & TOF_SACK) == 0 &&
+			    TAILQ_EMPTY(&tp->snd_holes)) {
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
@@ -2061,9 +2056,7 @@
 				    th->th_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
-				    ((V_tcp_do_newreno ||
-				      (tp->t_flags & TF_SACK_PERMIT)) &&
-				     IN_FASTRECOVERY(tp))) {
+				     IN_FASTRECOVERY(tp)) {
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp)) {
 						int awnd;
@@ -2100,14 +2093,24 @@
 							tp->t_dupacks = 0;
 							break;
 						}
-					} else if (V_tcp_do_newreno ||
-					    V_tcp_do_ecn) {
+					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
+
+					/*
+					 * If the current tcp cc module has
+					 * defined a hook for tasks to run
+					 * before entering FR, call it
+					 */
+					if (CC_ALGO(tp)->pre_fr != NULL)
+						CC_ALGO(tp)->pre_fr(tp, th);
+
+					ENTER_FASTRECOVERY(tp);
+					tp->snd_recover = tp->snd_max;
 					tcp_congestion_exp(tp);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
@@ -2172,37 +2175,16 @@
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
-		if (V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) {
-			if (IN_FASTRECOVERY(tp)) {
-				if (SEQ_LT(th->th_ack, tp->snd_recover)) {
-					if (tp->t_flags & TF_SACK_PERMIT)
-						tcp_sack_partialack(tp, th);
-					else
-						tcp_newreno_partial_ack(tp, th);
-				} else {
-					/*
-					 * Out of fast recovery.
-					 * Window inflation should have left us
-					 * with approximately snd_ssthresh
-					 * outstanding data.
-					 * But in case we would be inclined to
-					 * send a burst, better to do it via
-					 * the slow start mechanism.
-					 */
-					if (SEQ_GT(th->th_ack +
-							tp->snd_ssthresh,
-						   tp->snd_max))
-						tp->snd_cwnd = tp->snd_max -
-								th->th_ack +
-								tp->t_maxseg;
-					else
-						tp->snd_cwnd = tp->snd_ssthresh;
-				}
+		if (IN_FASTRECOVERY(tp)) {
+			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+				if (tp->t_flags & TF_SACK_PERMIT)
+					tcp_sack_partialack(tp, th);
+				else
+					tcp_newreno_partial_ack(tp, th);
+			} else {
+				if (CC_ALGO(tp)->post_fr != NULL)
+					CC_ALGO(tp)->post_fr(tp, th);
 			}
-		} else {
-			if (tp->t_dupacks >= tcprexmtthresh &&
-			    tp->snd_cwnd > tp->snd_ssthresh)
-				tp->snd_cwnd = tp->snd_ssthresh;
 		}
 		tp->t_dupacks = 0;
 		/*
@@ -2302,59 +2284,12 @@
 
 		/*
 		 * When new data is acked, open the congestion window.
-		 * Method depends on which congestion control state we're
-		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
-		 * enabled.
-		 *
-		 * slow start: cwnd <= ssthresh
-		 * cong avoid: cwnd > ssthresh
-		 *
-		 * slow start and ABC (RFC 3465):
-		 *   Grow cwnd exponentially by the amount of data
-		 *   ACKed capping the max increment per ACK to
-		 *   (abc_l_var * maxseg) bytes.
-		 *
-		 * slow start without ABC (RFC 2581):
-		 *   Grow cwnd exponentially by maxseg per ACK.
-		 *
-		 * cong avoid and ABC (RFC 3465):
-		 *   Grow cwnd linearly by maxseg per RTT for each
-		 *   cwnd worth of ACKed data.
-		 *
-		 * cong avoid without ABC (RFC 2581):
-		 *   Grow cwnd linearly by approximately maxseg per RTT using
-		 *   maxseg^2 / cwnd per ACK as the increment.
-		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
-		 *   avoid capping cwnd.
+		 * The specifics of how this is achieved are up to the
+		 * congestion control algorithm in use for this connection.
 		 */
-		if ((!V_tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
-		    !IN_FASTRECOVERY(tp)) {
-			u_int cw = tp->snd_cwnd;
-			u_int incr = tp->t_maxseg;
-			/* In congestion avoidance? */
-			if (cw > tp->snd_ssthresh) {
-				if (V_tcp_do_rfc3465) {
-					tp->t_bytes_acked += acked;
-					if (tp->t_bytes_acked >= tp->snd_cwnd)
-						tp->t_bytes_acked -= cw;
-					else
-						incr = 0;
-				}
-				else
-					incr = max((incr * incr / cw), 1);
-			/*
-			 * In slow-start with ABC enabled and no RTO in sight?
-			 * (Must not use abc_l_var > 1 if slow starting after an
-			 * RTO. On RTO, snd_nxt = snd_una, so the snd_nxt ==
-			 * snd_max check is sufficient to handle this).
-			 */
-			} else if (V_tcp_do_rfc3465 &&
-			    tp->snd_nxt == tp->snd_max)
-				incr = min(acked,
-				    V_tcp_abc_l_var * tp->t_maxseg);
-			/* ABC is on by default, so (incr == 0) frequently. */
-			if (incr > 0)
-				tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+		if (!IN_FASTRECOVERY(tp)) {
+			if (CC_ALGO(tp)->ack_received != NULL)
+				CC_ALGO(tp)->ack_received(tp, th);
 		}
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > so->so_snd.sb_cc) {
@@ -2369,13 +2304,11 @@
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		/* Detect una wraparound. */
-		if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
-		    !IN_FASTRECOVERY(tp) &&
+		if (!IN_FASTRECOVERY(tp) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
-		if ((V_tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
-		    IN_FASTRECOVERY(tp) &&
+		if (IN_FASTRECOVERY(tp) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_FASTRECOVERY(tp);
 			tp->t_bytes_acked = 0;
@@ -3336,41 +3269,9 @@
 	if (metrics.rmx_bandwidth)
 		tp->snd_bandwidth = metrics.rmx_bandwidth;
 
-	/*
-	 * Set the slow-start flight size depending on whether this
-	 * is a local network or not.
-	 *
-	 * Extend this so we cache the cwnd too and retrieve it here.
-	 * Make cwnd even bigger than RFC3390 suggests but only if we
-	 * have previous experience with the remote host. Be careful
-	 * not make cwnd bigger than remote receive window or our own
-	 * send socket buffer. Maybe put some additional upper bound
-	 * on the retrieved cwnd. Should do incremental updates to
-	 * hostcache when cwnd collapses so next connection doesn't
-	 * overloads the path again.
-	 *
-	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
-	 * We currently check only in syncache_socket for that.
-	 */
-#define TCP_METRICS_CWND
-#ifdef TCP_METRICS_CWND
-	if (metrics.rmx_cwnd)
-		tp->snd_cwnd = max(mss,
-				min(metrics.rmx_cwnd / 2,
-				 min(tp->snd_wnd, so->so_snd.sb_hiwat)));
-	else
-#endif
-	if (V_tcp_do_rfc3390)
-		tp->snd_cwnd = min(4 * mss, max(2 * mss, 4380));
-#ifdef INET6
-	else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
-		 (!isipv6 && in_localaddr(inp->inp_faddr)))
-#else
-	else if (in_localaddr(inp->inp_faddr))
-#endif
-		tp->snd_cwnd = mss * V_ss_fltsz_local;
-	else
-		tp->snd_cwnd = mss * V_ss_fltsz;
+	/* set the initial cwnd value */
+	if (CC_ALGO(tp)->cwnd_init != NULL)
+		CC_ALGO(tp)->cwnd_init(tp);
 
 	/* Check the interface for TSO capabilities. */
 	if (mtuflags & CSUM_TSO)
Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c	(.../head/sys)	(revision 190777)
+++ netinet/tcp_subr.c	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -49,6 +49,8 @@
 #ifdef INET6
 #include <sys/domain.h>
 #endif
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
@@ -62,6 +64,7 @@
 #include <net/route.h>
 #include <net/if.h>
 
+#include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
@@ -80,7 +83,6 @@
 #include <netinet6/nd6.h>
 #endif
 #include <netinet/ip_icmp.h>
-#include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -361,6 +363,8 @@
 	V_tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 
+	cc_init();
+
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 
 	INP_INFO_LOCK_INIT(&V_tcbinfo, "tcp");
@@ -690,6 +694,21 @@
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
+
+	/*
+	 * use the current system default cc algorithm, which is always
+	 * the first algorithm in cc_list
+	 */
+	CC_LIST_RLOCK();
+	CC_ALGO(tp) = STAILQ_FIRST(&cc_list);
+	CC_LIST_RUNLOCK();
+
+	/* if the cc module fails to init, stop building the control block */
+	if (CC_ALGO(tp)->init(tp) > 0) {
+		uma_zfree(tcpcb_zone, tp);
+		return NULL;
+	}
+
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg = tp->t_maxopd =
@@ -852,8 +871,13 @@
 	}
 	/* Disconnect offload device, if any. */
 	tcp_offload_detach(tp);
-		
 	tcp_free_sackholes(tp);
+
+	/* Allow the cc algorithm in use for this cb to clean up after itself */
+	if (CC_ALGO(tp)->deinit != NULL)
+		CC_ALGO(tp)->deinit(tp);
+
+	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
 	tp->t_inpcb = NULL;
 	uma_zfree(tcpcb_zone, tp);
Index: netinet/tcp_timer.c
===================================================================
--- netinet/tcp_timer.c	(.../head/sys)	(revision 190777)
+++ netinet/tcp_timer.c	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -50,6 +50,7 @@
 #include <net/if.h>
 #include <net/route.h>
 
+#include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
@@ -57,7 +58,6 @@
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/ip_var.h>
-#include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
@@ -554,38 +554,11 @@
 	 * If timing a segment in this window, stop the timer.
 	 */
 	tp->t_rtttime = 0;
-	/*
-	 * Close the congestion window down to one segment
-	 * (we'll open it by one segment for each ack we get).
-	 * Since we probably have a window's worth of unacked
-	 * data accumulated, this "slow start" keeps us from
-	 * dumping all that data as back-to-back packets (which
-	 * might overwhelm an intermediate gateway).
-	 *
-	 * There are two phases to the opening: Initially we
-	 * open by one mss on each ack.  This makes the window
-	 * size increase exponentially with time.  If the
-	 * window is larger than the path can handle, this
-	 * exponential growth results in dropped packet(s)
-	 * almost immediately.  To get more time between
-	 * drops but still "push" the network to take advantage
-	 * of improving conditions, we switch from exponential
-	 * to linear window opening at some threshhold size.
-	 * For a threshhold, we use half the current window
-	 * size, truncated to a multiple of the mss.
-	 *
-	 * (the minimum cwnd that will give us exponential
-	 * growth is 2 mss.  We don't allow the threshhold
-	 * to go below this.)
-	 */
-	{
-		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
-		if (win < 2)
-			win = 2;
-		tp->snd_cwnd = tp->t_maxseg;
-		tp->snd_ssthresh = win * tp->t_maxseg;
-		tp->t_dupacks = 0;
-	}
+
+	if (CC_ALGO(tp)->after_timeout != NULL)
+		CC_ALGO(tp)->after_timeout(tp);
+
+	tp->t_dupacks = 0;
 	EXIT_FASTRECOVERY(tp);
 	tp->t_bytes_acked = 0;
 	(void) tcp_output(tp);
Index: netinet/tcp_var.h
===================================================================
--- netinet/tcp_var.h	(.../head/sys)	(revision 190777)
+++ netinet/tcp_var.h	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -190,6 +190,8 @@
 	struct toe_usrreqs *t_tu;       /* offload operations vector */
 	void	*t_toe;			/* TOE pcb pointer */
 	int	t_bytes_acked;		/* # bytes acked during current RTT */
+	struct cc_algo	*cc_algo;	/* the algorithm that will manage congestion control*/
+	void	*cc_data;		/* pointer to a struct containing data required for the cc algorithm in use */
 };
 
 /*
@@ -527,7 +529,7 @@
 extern	int tcp_mssdflt;	/* XXX */
 extern	int tcp_minmss;
 extern	int tcp_delack_enabled;
-extern	int tcp_do_newreno;
+extern	int tcp_do_rfc3390;
 extern	int path_mtu_discovery;
 extern	int ss_fltsz;
 extern	int ss_fltsz_local;
Index: netinet/tcp_output.c
===================================================================
--- netinet/tcp_output.c	(.../head/sys)	(revision 190777)
+++ netinet/tcp_output.c	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -54,6 +54,7 @@
 #include <net/if.h>
 #include <net/route.h>
 
+#include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
@@ -65,7 +66,6 @@
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
-#include <netinet/tcp.h>
 #define	TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
@@ -111,9 +111,6 @@
 	local_slowstart_flightsize, CTLFLAG_RW,
 	ss_fltsz_local, 1, "Slow start flight size for local networks");
 
-SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW,
-	tcp_do_newreno, 0, "Enable NewReno Algorithms");
-
 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
 	tcp_do_tso, 0, "Enable TCP Segmentation Offload");
 
@@ -174,24 +171,9 @@
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
 	if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
-		/*
-		 * We have been idle for "a while" and no acks are
-		 * expected to clock out any data we send --
-		 * slow start to get ack "clock" running again.
-		 *
-		 * Set the slow-start flight size depending on whether
-		 * this is a local network or not.
-		 */
-		int ss = V_ss_fltsz;
-#ifdef INET6
-		if (isipv6) {
-			if (in6_localaddr(&tp->t_inpcb->in6p_faddr))
-				ss = V_ss_fltsz_local;
-		} else
-#endif /* INET6 */
-		if (in_localaddr(tp->t_inpcb->inp_faddr))
-			ss = V_ss_fltsz_local;
-		tp->snd_cwnd = tp->t_maxseg * ss;
+		/* reset cwnd after a period of idleness */
+		if (CC_ALGO(tp)->after_idle != NULL)
+			CC_ALGO(tp)->after_idle(tp);
 	}
 	tp->t_flags &= ~TF_LASTIDLE;
 	if (idle) {
Index: netinet/tcp_usrreq.c
===================================================================
--- netinet/tcp_usrreq.c	(.../head/sys)	(revision 190777)
+++ netinet/tcp_usrreq.c	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -62,6 +62,7 @@
 #include <net/if.h>
 #include <net/route.h>
 
+#include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET6
@@ -77,7 +78,6 @@
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
-#include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -1256,6 +1256,8 @@
 	struct	inpcb *inp;
 	struct	tcpcb *tp;
 	struct	tcp_info ti;
+	char buf[TCP_CA_NAME_MAX];
+	struct cc_algo *cc_algo;
 
 	error = 0;
 	inp = sotoinpcb(so);
@@ -1365,6 +1367,58 @@
 			error = EINVAL;
 			break;
 
+		case TCP_CONGESTION:
+			INP_WUNLOCK(inp);
+			bzero(buf, sizeof(buf));
+			error = sooptcopyin(sopt, &buf, sizeof(buf), 1);
+			if (error)
+				break;
+			INP_WLOCK_RECHECK(inp);
+			/*
+			 * We return EINVAL if we can't find the requested cc
+			 * algo. We set error here and reset to 0 if found to
+			 * simplify the error checking,
+			 */
+			error = EINVAL;
+			CC_LIST_RLOCK();
+			STAILQ_FOREACH(cc_algo, &cc_list, entries) {
+				if (	strncmp(buf,
+					cc_algo->name,
+					TCP_CA_NAME_MAX) == 0) {
+					/*
+					 * we've found the requested algo,
+					 * so revert the EINVAL error condition.
+					 */
+					error = 0;
+					/*
+					 * we hold a write lock over the tcb
+					 * so it's safe to do these things
+					 * without ordering concerns
+					 */
+					if (CC_ALGO(tp)->deinit != NULL)
+						CC_ALGO(tp)->deinit(tp);
+					CC_ALGO(tp) = cc_algo;
+					/*
+					 * if something goes pear shaped
+					 * initialising the new algo,
+					 * fall back to newreno (which
+					 * does not require initialisation)
+					 */
+					if (cc_algo->init(tp) > 0) {
+						CC_ALGO(tp) = &newreno_cc_algo;
+						/*
+						 * the only reason init() should
+						 * fail is because of malloc
+						 */
+						error = ENOMEM;
+					}
+					break; /* break the STAILQ_FOREACH */
+				}
+			}
+			CC_LIST_RUNLOCK();
+			INP_WUNLOCK(inp);
+			break;
+
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -1408,6 +1462,12 @@
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ti, sizeof ti);
 			break;
+		case TCP_CONGESTION:
+			bzero(buf, sizeof(buf));
+			strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
+			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
Index: netinet/cc.c
===================================================================
--- netinet/cc.c	(.../head/sys)	(revision 0)
+++ netinet/cc.c	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -0,0 +1,454 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ *	The Regents of the University of California.
+ * Copyright (c) 2008 Swinburne University of Technology, Melbourne, Australia
+ * All rights reserved.
+ *
+ * The majority of this software was developed at the Centre for
+ * Advanced Internet Architectures, Swinburne University, by Lawrence Stewart
+ * and James Healy, made possible in part by a grant from the Cisco University
+ * Research Program Fund at Community Foundation Silicon Valley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/libkern.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+#include <sys/sbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+
+#include <netinet/cc.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/vinet.h>
+
+/* list of available cc algorithms on the current system */
+struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 
+
+struct rwlock cc_list_lock;
+
+/* create a struct to point to our newreno functions */
+struct cc_algo newreno_cc_algo = {
+	.name = "newreno",
+	.init = newreno_init,
+	.deinit = NULL,
+	.cwnd_init = newreno_cwnd_init,
+	.ack_received = newreno_ack_received,
+	.pre_fr = newreno_pre_fr,
+	.post_fr = newreno_post_fr,
+	.after_idle = newreno_after_idle,
+	.after_timeout = newreno_after_timeout
+};
+
+/* the system wide default cc algorithm */
+char cc_algorithm[TCP_CA_NAME_MAX];
+
+/*
+ * sysctl handler that allows the default cc algorithm for the system to be
+ * viewed and changed
+ */
+static int
+cc_default_algorithm(SYSCTL_HANDLER_ARGS)
+{
+	struct cc_algo *funcs;
+
+	if (req->newptr == NULL)
+		goto skip;
+
+	CC_LIST_RLOCK();
+	STAILQ_FOREACH(funcs, &cc_list, entries) {
+		if (strncmp((char *)req->newptr, funcs->name, TCP_CA_NAME_MAX) == 0)
+			goto reorder;
+	}
+	CC_LIST_RUNLOCK();
+
+	return 1;
+
+reorder:
+	/*
+	 * Make the selected system default cc algorithm
+	 * the first element in the list if it isn't already
+	 */
+	CC_LIST_RUNLOCK();
+	CC_LIST_WLOCK();
+	if (funcs != STAILQ_FIRST(&cc_list)) {
+		STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
+		STAILQ_INSERT_HEAD(&cc_list, funcs, entries);
+	}
+	CC_LIST_WUNLOCK();
+
+skip:
+	return sysctl_handle_string(oidp, arg1, arg2, req);
+}
+
+/*
+ * sysctl handler that displays the available cc algorithms as a read
+ * only value
+ */
+static int
+cc_list_available(SYSCTL_HANDLER_ARGS)
+{
+	struct cc_algo *algo;
+	int error = 0, first = 1;
+	struct sbuf *s = NULL;
+
+	if ((s = sbuf_new(NULL, NULL, TCP_CA_NAME_MAX, SBUF_AUTOEXTEND)) == NULL)
+		return -1;
+
+	CC_LIST_RLOCK();
+	STAILQ_FOREACH(algo, &cc_list, entries) {
+		error = sbuf_printf(s, (first) ? "%s" : ", %s", algo->name);
+		if (error != 0)
+			break;
+		first = 0;
+	}
+	CC_LIST_RUNLOCK();
+
+	if (!error) {
+		sbuf_finish(s);
+		error = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
+	}
+
+	sbuf_delete(s);
+	return error;
+}
+
+/*
+ * Initialise cc on system boot
+ */
+void 
+cc_init()
+{
+	/* initialise the lock that will protect read/write access to our linked list */
+	CC_LIST_LOCK_INIT();
+
+	/* initilize list of cc algorithms */
+	STAILQ_INIT(&cc_list);
+
+	/* add newreno to the list of available algorithms */
+	cc_register_algorithm(&newreno_cc_algo);
+
+	/* set newreno to the system default */
+	strlcpy(cc_algorithm, newreno_cc_algo.name, TCP_CA_NAME_MAX);
+}
+
+/*
+ * Returns 1 on success, 0 on failure
+ */
+int
+cc_deregister_algorithm(struct cc_algo *remove_cc)
+{
+	struct cc_algo *funcs, *tmpfuncs;
+	register struct tcpcb *tp = NULL;
+	register struct inpcb *inp = NULL;
+	int success = 0;
+
+	/* remove the algorithm from the list available to the system */
+	CC_LIST_RLOCK();
+	STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
+		if (funcs == remove_cc) {
+			if (CC_LIST_TRY_WLOCK()) {
+				/* if this algorithm is the system default, reset the default to newreno */
+				if (strncmp(cc_algorithm, remove_cc->name, TCP_CA_NAME_MAX) == 0)
+					snprintf(cc_algorithm, TCP_CA_NAME_MAX, "%s", newreno_cc_algo.name);
+
+				STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
+				success = 1;
+				CC_LIST_W2RLOCK();
+			}
+			break;
+		}
+	}
+	CC_LIST_RUNLOCK();
+
+	if (success) {
+		/*
+		 * check all active control blocks and change any that are using this
+		 * algorithm back to newreno. If the algorithm that was in use requires
+		 * deinit code to be run, call it
+		 */
+		INP_INFO_RLOCK(&V_tcbinfo);
+		LIST_FOREACH(inp, &V_tcb, inp_list) {
+			/* skip tcptw structs */
+			if (inp->inp_vflag & INP_TIMEWAIT)
+				continue;
+			INP_WLOCK(inp);
+			if ((tp = intotcpcb(inp)) != NULL) {
+				if (strncmp(CC_ALGO(tp)->name, remove_cc->name, TCP_CA_NAME_MAX) == 0 ) {
+					tmpfuncs = CC_ALGO(tp);
+					CC_ALGO(tp) = &newreno_cc_algo;
+					/*
+					 * XXX: We should stall here until
+					 * we're sure the tcb has stopped
+					 * using the deregistered algo's functions...
+					 * Not sure how to do that yet!
+					 */
+					if(CC_ALGO(tp)->init != NULL)
+						CC_ALGO(tp)->init(tp);
+					if (tmpfuncs->deinit != NULL)
+						tmpfuncs->deinit(tp);
+				}
+			}
+			INP_WUNLOCK(inp);
+		}
+		INP_INFO_RUNLOCK(&V_tcbinfo);
+	}
+
+	return success;
+}
+
+int
+cc_register_algorithm(struct cc_algo *add_cc)
+{
+	CC_LIST_WLOCK();
+	STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
+	CC_LIST_WUNLOCK();
+	return 1;
+}
+
+/*
+ * NEW RENO
+ */
+
+int
+newreno_init(struct tcpcb *tp)
+{
+	return 0;
+}
+
+/*
+ * update ssthresh to approx 1/2 of cwnd
+ */
+void
+newreno_ssthresh_update(struct tcpcb *tp)
+{
+	u_int win;
+
+	/* reset ssthresh */
+	win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
+
+	if (win < 2)
+		win = 2;
+
+	tp->snd_ssthresh = win * tp->t_maxseg;
+}
+
+/*
+ * initial cwnd at the start of a connection
+ * if there is a hostcache entry for the foreign host, base cwnd on that
+ * if rfc3390 is enabled, set cwnd to approx 4 MSS as recommended
+ * otherwise use the sysctl variables configured by the administrator
+ */
+void
+newreno_cwnd_init(struct tcpcb *tp)
+{
+	struct hc_metrics_lite metrics;
+	struct inpcb *inp = tp->t_inpcb;
+	struct socket *so = inp->inp_socket;
+
+	/*
+	 * Set the slow-start flight size depending on whether this
+	 * is a local network or not.
+	 *
+	 * Extend this so we cache the cwnd too and retrieve it here.
+	 * Make cwnd even bigger than RFC3390 suggests but only if we
+	 * have previous experience with the remote host. Be careful
+	 * not make cwnd bigger than remote receive window or our own
+	 * send socket buffer. Maybe put some additional upper bound
+	 * on the retrieved cwnd. Should do incremental updates to
+	 * hostcache when cwnd collapses so next connection doesn't
+	 * overloads the path again.
+	 *
+	 * RFC3390 says only do this if SYN or SYN/ACK didn't got lost.
+	 * We currently check only in syncache_socket for that.
+	 */
+
+	tcp_hc_get(&inp->inp_inc, &metrics);
+
+#define TCP_METRICS_CWND
+#ifdef TCP_METRICS_CWND
+	if (metrics.rmx_cwnd)
+		tp->snd_cwnd = max(tp->t_maxseg,
+				min(metrics.rmx_cwnd / 2,
+				 min(tp->snd_wnd, so->so_snd.sb_hiwat)));
+	else
+#endif
+	if (V_tcp_do_rfc3390)
+		tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380));
+#ifdef INET6
+	else if ((isipv6 && in6_localaddr(&inp->in6p_faddr)) ||
+		 (!isipv6 && in_localaddr(inp->inp_faddr)))
+#else
+	else if (in_localaddr(inp->inp_faddr))
+#endif
+		tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz_local;
+	else
+		tp->snd_cwnd = tp->t_maxseg * V_ss_fltsz;
+}
+
+/*
+ * increase cwnd on receipt of a successful ACK
+ * if cwnd <= ssthresh, increases by 1 MSS per ACK
+ * if cwnd > ssthresh, increase by ~1 MSS per RTT
+ */
+void
+newreno_ack_received(struct tcpcb *tp, struct tcphdr *th)
+{
+	u_int cw = tp->snd_cwnd;
+	u_int incr = tp->t_maxseg;
+
+	/*
+	 * If cwnd <= ssthresh, open exponentially (maxseg per packet).
+	 * Otherwise, open linearly (approx. maxseg per RTT
+	 * i.e. maxseg^2 / cwnd per ACK received).
+	 * If cwnd > maxseg^2, fix the cwnd increment at 1 byte
+	 * to avoid capping cwnd (as suggested in RFC2581).
+	 */
+	if (cw > tp->snd_ssthresh)
+		incr = max((incr * incr / cw), 1);
+
+	tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
+}
+
+/*
+ * update the value of ssthresh before entering FR
+ */
+void
+newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th)
+{
+	newreno_ssthresh_update(tp);
+}
+
+/*
+ * decrease the cwnd in response to packet loss or a transmit timeout.
+ * th can be null, in which case cwnd will be set according to reno instead
+ * of new reno.
+ */
+void
+newreno_post_fr(struct tcpcb *tp, struct tcphdr *th)
+{
+	/*
+	* Out of fast recovery.
+	* Window inflation should have left us
+	* with approximately snd_ssthresh
+	* outstanding data.
+	* But in case we would be inclined to
+	* send a burst, better to do it via
+	* the slow start mechanism.
+	*/
+	if (th && SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
+		tp->snd_cwnd = tp->snd_max - th->th_ack + tp->t_maxseg;
+	else
+		tp->snd_cwnd = tp->snd_ssthresh;
+}
+
+/*
+ * if a connection has been idle for a while and more data is ready to be sent,
+ * reset cwnd
+ */
+void
+newreno_after_idle(struct tcpcb *tp)
+{
+	/*
+	* We have been idle for "a while" and no acks are
+	* expected to clock out any data we send --
+	* slow start to get ack "clock" running again.
+	*
+	* Set the slow-start flight size depending on whether
+	* this is a local network or not.
+	*
+	* Set the slow-start flight size depending on whether
+	* this is a local network or not.
+	*/
+	int ss = V_ss_fltsz;
+
+#ifdef INET6
+	if (isipv6) {
+		if (in6_localaddr(&tp->t_inpcb->in6p_faddr))
+			ss = V_ss_fltsz_local;
+	} else
+#endif /* INET6 */
+
+	if (in_localaddr(tp->t_inpcb->inp_faddr))
+		ss = V_ss_fltsz_local;
+
+	tp->snd_cwnd = tp->t_maxseg * ss;
+}
+
+/*
+ * reset the cwnd after a transmission timeout.
+ */
+void
+newreno_after_timeout(struct tcpcb *tp)
+{
+	newreno_ssthresh_update(tp);
+
+	/*
+	 * Close the congestion window down to one segment
+	 * (we'll open it by one segment for each ack we get).
+	 * Since we probably have a window's worth of unacked
+	 * data accumulated, this "slow start" keeps us from
+	 * dumping all that data as back-to-back packets (which
+	 * might overwhelm an intermediate gateway).
+	 *
+	 * There are two phases to the opening: Initially we
+	 * open by one mss on each ack.  This makes the window
+	 * size increase exponentially with time.  If the
+	 * window is larger than the path can handle, this
+	 * exponential growth results in dropped packet(s)
+	 * almost immediately.  To get more time between
+	 * drops but still "push" the network to take advantage
+	 * of improving conditions, we switch from exponential
+	 * to linear window opening at some threshhold size.
+	 * For a threshhold, we use half the current window
+	 * size, truncated to a multiple of the mss.
+	 *
+	 * (the minimum cwnd that will give us exponential
+	 * growth is 2 mss.  We don't allow the threshhold
+	 * to go below this.)
+	 */
+	tp->snd_cwnd = tp->t_maxseg;
+}
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
+	"congestion control related settings");
+
+SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
+	&cc_algorithm, sizeof(cc_algorithm), cc_default_algorithm, "A",
+	"default congestion control algorithm");
+
+SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
+	NULL, 0, cc_list_available, "A",
+	"list available congestion control algorithms");
Index: netinet/cc.h
===================================================================
--- netinet/cc.h	(.../head/sys)	(revision 0)
+++ netinet/cc.h	(.../projects/tcp_cc_8.x/sys)	(revision 190777)
@@ -0,0 +1,119 @@
+/*-
+ * Copyright (c) 2008 Swinburne University of Technology, Melbourne, Australia
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
+ * made possible in part by a grant from the Cisco University Research Program
+ * Fund at Community Foundation Silicon Valley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_CC_H_
+#define _NETINET_CC_H_
+
+/* Needed for TCP_CA_NAME_MAX define which lives in tcp.h for compat reasons */
+#include <netinet/tcp.h>
+
+/*
+ * Global CC vars
+ */
+extern	STAILQ_HEAD(cc_head, cc_algo) cc_list;
+extern	char cc_algorithm[];
+extern	const int tcprexmtthresh;
+extern	struct cc_algo newreno_cc_algo;
+
+/*
+ * Define the new net.inet.tcp.cc sysctl tree
+ */
+SYSCTL_DECL(_net_inet_tcp_cc);
+
+/*
+ * CC housekeeping functions
+ */
+void	cc_init(void);
+int	cc_register_algorithm(struct cc_algo *add_cc);
+int	cc_deregister_algorithm(struct cc_algo *remove_cc);
+
+/*
+ * NewReno CC functions
+ */
+int	newreno_init(struct tcpcb *tp);
+void	newreno_cwnd_init(struct tcpcb *tp);
+void	newreno_ack_received(struct tcpcb *tp, struct tcphdr *th);
+void	newreno_pre_fr(struct tcpcb *tp, struct tcphdr *th);
+void	newreno_post_fr(struct tcpcb *tp, struct tcphdr *th);
+void	newreno_after_idle(struct tcpcb *tp);
+void	newreno_after_timeout(struct tcpcb *tp);
+void	newreno_ssthresh_update(struct tcpcb *tp);
+
+/*
+ * Structure to hold function pointers to the functions responsible
+ * for congestion control. Based on similar structure in the SCTP stack
+ */
+struct cc_algo {
+	char name[TCP_CA_NAME_MAX];
+
+	/* init the congestion algorithm for the specified control block */
+	int (*init) (struct tcpcb *tp);
+
+	/* deinit the congestion algorithm for the specified control block */
+	void (*deinit) (struct tcpcb *tp);
+
+	/* initilise cwnd at the start of a connection */
+	void (*cwnd_init) (struct tcpcb *tp);
+
+	/* called on the receipt of a valid ack */
+	void (*ack_received) (struct tcpcb *tp, struct tcphdr *th);
+
+	/* called before entering FR */
+	void (*pre_fr) (struct tcpcb *tp, struct tcphdr *th);
+
+	/*  after exiting FR */
+	void (*post_fr) (struct tcpcb *tp, struct tcphdr *th);
+
+	/* perform tasks when data transfer resumes after an idle period */
+	void (*after_idle) (struct tcpcb *tp);
+
+	/* perform tasks when the connection's retransmit timer expires */
+	void (*after_timeout) (struct tcpcb *tp);
+
+	STAILQ_ENTRY(cc_algo) entries;
+};
+
+#define CC_ALGO(tp) ((tp)->cc_algo)
+#define CC_DATA(tp) ((tp)->cc_data)
+
+extern struct rwlock cc_list_lock;
+#define CC_LIST_LOCK_INIT() rw_init(&cc_list_lock, "cc_list")
+#define CC_LIST_LOCK_DESTROY() rw_destroy(&cc_list_lock)
+#define CC_LIST_RLOCK() rw_rlock(&cc_list_lock)
+#define CC_LIST_RUNLOCK() rw_runlock(&cc_list_lock)
+#define CC_LIST_WLOCK() rw_wlock(&cc_list_lock)
+#define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock)
+#define CC_LIST_TRY_WLOCK() rw_try_upgrade(&cc_list_lock)
+#define CC_LIST_W2RLOCK() rw_downgrade(&cc_list_lock)
+
+#endif /* _NETINET_CC_H_ */