RELENG_5 to delphijfork patch
$Phantasm: delphijweb/research/freebsd/delphijfork.diff,v 1.12 2004/10/04 17:38:52 delphij Exp $

Index: src/lib/libc/stdlib/malloc.c
diff -u src/lib/libc/stdlib/malloc.c:1.89.2.1 src/lib/libc/stdlib/malloc.c:1.87.1000.3
--- src/lib/libc/stdlib/malloc.c:1.89.2.1	Tue Sep  7 23:38:34 2004
+++ src/lib/libc/stdlib/malloc.c	Mon Jul  5 22:08:17 2004
@@ -221,7 +221,7 @@
 static struct pgfree free_list;
 
 /* Abort(), user doesn't handle problems.  */
-static int malloc_abort;
+static int malloc_abort = 0;
 
 /* Are we trying to die ?  */
 static int suicide;
@@ -244,7 +244,7 @@
 static int malloc_zero;
 
 /* junk fill ?  */
-static int malloc_junk;
+static int malloc_junk = 0;
 
 #ifdef HAS_UTRACE
 
Index: src/sys/conf/newvers.sh
diff -u src/sys/conf/newvers.sh:1.62.2.15 src/sys/conf/newvers.sh:1.61.1000.5
--- src/sys/conf/newvers.sh:1.62.2.15	Sat Oct  2 22:17:44 2004
+++ src/sys/conf/newvers.sh	Thu Aug 19 20:12:17 2004
@@ -32,7 +32,7 @@
 
 TYPE="FreeBSD"
 REVISION="5.3"
-BRANCH="BETA7"
+BRANCH="delphij"
 RELEASE="${REVISION}-${BRANCH}"
 VERSION="${TYPE} ${RELEASE}"
 
@@ -82,7 +82,7 @@
 
 touch version
 v=`cat version` u=${USER-root} d=`pwd` h=${HOSTNAME-`hostname`} t=`date`
-i=`${MAKE:-make} -V KERN_IDENT`
+i=`make -V KERN_IDENT`
 cat << EOF > vers.c
 $COPYRIGHT
 char sccspad[32 - 4 /* sizeof(sccs) */] = { '\\0' };
Index: src/sys/dev/ata/atapi-cd.c
diff -u src/sys/dev/ata/atapi-cd.c:1.170.2.1 src/sys/dev/ata/atapi-cd.c:1.170.1000.2
--- src/sys/dev/ata/atapi-cd.c:1.170.2.1	Tue Sep  7 03:36:26 2004
+++ src/sys/dev/ata/atapi-cd.c	Tue Sep  7 15:42:33 2004
@@ -117,6 +117,7 @@
     }
 
     ata_set_name(atadev, "acd", cdp->lun);
+    ata_controlcmd(atadev, ATA_ATAPI_RESET, 0, 0, 0);
     acd_get_cap(cdp);
 
     /* if this is a changer device, allocate the neeeded lun's */
Index: src/sys/i386/conf/GENERIC
diff -u src/sys/i386/conf/GENERIC:1.413.2.6 src/sys/i386/conf/GENERIC:1.405.1000.10
--- src/sys/i386/conf/GENERIC:1.413.2.6	Thu Sep 23 03:23:37 2004
+++ src/sys/i386/conf/GENERIC	Sun Sep 26 15:56:37 2004
@@ -19,8 +19,8 @@
 # $FreeBSD$
 
 machine		i386
-cpu		I486_CPU
-cpu		I586_CPU
+#cpu		I486_CPU
+#cpu		I586_CPU
 cpu		I686_CPU
 ident		GENERIC
 
@@ -75,8 +75,8 @@
 device		atadisk		# ATA disk drives
 device		ataraid		# ATA RAID drives
 device		atapicd		# ATAPI CDROM drives
-device		atapifd		# ATAPI floppy drives
-device		atapist		# ATAPI tape drives
+#device		atapifd		# ATAPI floppy drives
+#device		atapist		# ATAPI tape drives
 options 	ATA_STATIC_ID	# Static device numbering
 
 # SCSI Controllers
@@ -271,3 +271,12 @@
 device		firewire	# FireWire bus code
 device		sbp		# SCSI over FireWire (Requires scbus and da)
 device		fwe		# Ethernet over FireWire (non-standard!)
+
+# delphij's usual hacks
+makeoptions	CONF_CFLAGS=-fno-builtin
+options		INCLUDE_CONFIG_FILE
+options		ACCEPT_FILTER_DATA
+options		ACCEPT_FILTER_HTTP
+options		ZERO_COPY_SOCKETS
+options		AUTO_EOI_1
+
Index: src/sys/kern/init_main.c
diff -u src/sys/kern/init_main.c:1.246.2.2 src/sys/kern/init_main.c:1.242.1000.6
--- src/sys/kern/init_main.c:1.246.2.2	Thu Sep  9 18:03:19 2004
+++ src/sys/kern/init_main.c	Mon Sep 13 10:40:03 2004
@@ -87,7 +87,7 @@
 
 /* Components of the first process -- never freed. */
 static struct session session0;
-static struct pgrp pgrp0;
+struct pgrp pgrp0;
 struct	proc proc0;
 struct	thread thread0;
 struct	ksegrp ksegrp0;
@@ -355,10 +355,8 @@
 	 * Create process 0 (the swapper).
 	 */
 	LIST_INSERT_HEAD(&allproc, p, p_list);
-	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 	p->p_pgrp = &pgrp0;
-	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
 	LIST_INIT(&pgrp0.pg_members);
 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
 
Index: src/sys/kern/kern_exit.c
diff -u src/sys/kern/kern_exit.c:1.245.2.1 src/sys/kern/kern_exit.c:1.229.1000.18
--- src/sys/kern/kern_exit.c:1.245.2.1	Thu Sep  9 18:03:19 2004
+++ src/sys/kern/kern_exit.c	Mon Sep 13 10:40:03 2004
@@ -75,7 +75,6 @@
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
-#include <vm/uma.h>
 #include <sys/user.h>
 
 /* Required to be non-static for SysVR4 emulator */
@@ -386,7 +385,6 @@
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
-	LIST_REMOVE(p, p_hash);
 	sx_xunlock(&allproc_lock);
 
 	sx_xlock(&proctree_lock);
@@ -686,10 +684,7 @@
 #endif
 			KASSERT(FIRST_THREAD_IN_PROC(p),
 			    ("kern_wait: no residual thread!"));
-			uma_zfree(proc_zone, p);
-			sx_xlock(&allproc_lock);
-			nprocs--;
-			sx_xunlock(&allproc_lock);
+			proc_free(p);
 			return (0);
 		}
 		mtx_lock_spin(&sched_lock);
Index: src/sys/kern/kern_fork.c
diff -u src/sys/kern/kern_fork.c:1.234.2.4 src/sys/kern/kern_fork.c:1.226.1000.13
--- src/sys/kern/kern_fork.c:1.234.2.4	Sat Sep 18 12:11:35 2004
+++ src/sys/kern/kern_fork.c	Mon Sep 20 10:13:32 2004
@@ -69,7 +69,6 @@
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
-#include <vm/uma.h>
 
 #include <sys/user.h>
 #include <machine/critical.h>
@@ -145,48 +144,6 @@
 	return (error);
 }
 
-int	nprocs = 1;		/* process 0 */
-int	lastpid = 0;
-SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 
-    "Last used PID");
-
-/*
- * Random component to lastpid generation.  We mix in a random factor to make
- * it a little harder to predict.  We sanity check the modulus value to avoid
- * doing it in critical paths.  Don't let it be too small or we pointlessly
- * waste randomness entropy, and don't let it be impossibly large.  Using a
- * modulus that is too big causes a LOT more process table scans and slows
- * down fork processing as the pidchecked caching is defeated.
- */
-static int randompid = 0;
-
-static int
-sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
-{
-	int error, pid;
-
-	error = sysctl_wire_old_buffer(req, sizeof(int));
-	if (error != 0)
-		return(error);
-	sx_xlock(&allproc_lock);
-	pid = randompid;
-	error = sysctl_handle_int(oidp, &pid, 0, req);
-	if (error == 0 && req->newptr != NULL) {
-		if (pid < 0 || pid > PID_MAX - 100)	/* out of range */
-			pid = PID_MAX - 100;
-		else if (pid < 2)			/* NOP */
-			pid = 0;
-		else if (pid < 100)			/* Make it reasonable */
-			pid = 100;
-		randompid = pid;
-	}
-	sx_xunlock(&allproc_lock);
-	return (error);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
-
 int
 fork1(td, flags, pages, procp)
 	struct thread *td;
@@ -196,9 +153,7 @@
 {
 	struct proc *p1, *p2, *pptr;
 	uid_t uid;
-	struct proc *newproc;
-	int ok, trypid;
-	static int curfail, pidchecked = 0;
+	static int curfail;
 	static struct timeval lastfail;
 	struct filedesc *fd;
 	struct filedesc_to_leader *fdtol;
@@ -282,138 +237,20 @@
 	}
 
 	/* Allocate new proc. */
-	newproc = uma_zalloc(proc_zone, M_WAITOK);
-#ifdef MAC
-	mac_init_proc(newproc);
-#endif
-	knlist_init(&newproc->p_klist, &newproc->p_mtx);
-
-	/* We have to lock the process tree while we look for a pid. */
 	sx_slock(&proctree_lock);
 
-	/*
-	 * Although process entries are dynamically created, we still keep
-	 * a global limit on the maximum number we will create.  Don't allow
-	 * a nonprivileged user to use the last ten processes; don't let root
-	 * exceed the limit. The variable nprocs is the current number of
-	 * processes, maxproc is the limit.
-	 */
-	sx_xlock(&allproc_lock);
-	uid = td->td_ucred->cr_ruid;
-	if ((nprocs >= maxproc - 10 &&
-	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
-	    nprocs >= maxproc) {
+	p2 = proc_alloc(td, flags);
+	if (!p2) {
 		error = EAGAIN;
 		goto fail;
 	}
 
-	/*
-	 * Increment the count of procs running with this uid. Don't allow
-	 * a nonprivileged user to exceed their current limit.
-	 */
-	PROC_LOCK(p1);
-	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
-		(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
-	PROC_UNLOCK(p1);
-	if (!ok) {
-		error = EAGAIN;
-		goto fail;
-	}
-
-	/*
-	 * Increment the nprocs resource before blocking can occur.  There
-	 * are hard-limits as to the number of processes that can run.
-	 */
-	nprocs++;
-
-	/*
-	 * Find an unused process ID.  We remember a range of unused IDs
-	 * ready to use (from lastpid+1 through pidchecked-1).
-	 *
-	 * If RFHIGHPID is set (used during system boot), do not allocate
-	 * low-numbered pids.
-	 */
-	trypid = lastpid + 1;
-	if (flags & RFHIGHPID) {
-		if (trypid < 10)
-			trypid = 10;
-	} else {
-		if (randompid)
-			trypid += arc4random() % randompid;
-	}
-retry:
-	/*
-	 * If the process ID prototype has wrapped around,
-	 * restart somewhat above 0, as the low-numbered procs
-	 * tend to include daemons that don't exit.
-	 */
-	if (trypid >= PID_MAX) {
-		trypid = trypid % PID_MAX;
-		if (trypid < 100)
-			trypid += 100;
-		pidchecked = 0;
-	}
-	if (trypid >= pidchecked) {
-		int doingzomb = 0;
-
-		pidchecked = PID_MAX;
-		/*
-		 * Scan the active and zombie procs to check whether this pid
-		 * is in use.  Remember the lowest pid that's greater
-		 * than trypid, so we can avoid checking for a while.
-		 */
-		p2 = LIST_FIRST(&allproc);
-again:
-		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
-			PROC_LOCK(p2);
-			while (p2->p_pid == trypid ||
-			    (p2->p_pgrp != NULL &&
-			    (p2->p_pgrp->pg_id == trypid ||
-			    (p2->p_session != NULL &&
-			    p2->p_session->s_sid == trypid)))) {
-				trypid++;
-				if (trypid >= pidchecked) {
-					PROC_UNLOCK(p2);
-					goto retry;
-				}
-			}
-			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
-				pidchecked = p2->p_pid;
-			if (p2->p_pgrp != NULL) {
-				if (p2->p_pgrp->pg_id > trypid &&
-				    pidchecked > p2->p_pgrp->pg_id)
-					pidchecked = p2->p_pgrp->pg_id;
-				if (p2->p_session != NULL &&
-				    p2->p_session->s_sid > trypid &&
-				    pidchecked > p2->p_session->s_sid)
-					pidchecked = p2->p_session->s_sid;
-			}
-			PROC_UNLOCK(p2);
-		}
-		if (!doingzomb) {
-			doingzomb = 1;
-			p2 = LIST_FIRST(&zombproc);
-			goto again;
-		}
-	}
+#ifdef MAC
+	mac_init_proc(p2);
+#endif
 	sx_sunlock(&proctree_lock);
 
 	/*
-	 * RFHIGHPID does not mess with the lastpid counter during boot.
-	 */
-	if (flags & RFHIGHPID)
-		pidchecked = 0;
-	else
-		lastpid = trypid;
-
-	p2 = newproc;
-	p2->p_state = PRS_NEW;		/* protect against others */
-	p2->p_pid = trypid;
-	LIST_INSERT_HEAD(&allproc, p2, p_list);
-	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
-	sx_xunlock(&allproc_lock);
-
-	/*
 	 * Malloc things while we don't hold any locks.
 	 */
 	if (flags & RFSIGSHARE)
@@ -619,6 +456,7 @@
 	}
 	mtx_unlock(&ktrace_mtx);
 #endif
+	knlist_init(&p2->p_klist, &p2->p_mtx);
 
 	/*
 	 * If PF_FORK is set, the child process inherits the
@@ -743,15 +581,11 @@
 	*procp = p2;
 	return (0);
 fail:
+	uid = td->td_ucred->cr_ruid;
 	sx_sunlock(&proctree_lock);
 	if (ppsratecheck(&lastfail, &curfail, 1))
 		printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
 			uid);
-	sx_xunlock(&allproc_lock);
-#ifdef MAC
-	mac_destroy_proc(newproc);
-#endif
-	uma_zfree(proc_zone, newproc);
 	if (p1->p_flag & P_HADTHREADS) {
 		PROC_LOCK(p1);
 		thread_single_end();
Index: src/sys/kern/kern_proc.c
diff -u src/sys/kern/kern_proc.c:1.215.2.1 src/sys/kern/kern_proc.c:1.202.1000.21
--- src/sys/kern/kern_proc.c:1.215.2.1	Thu Sep  9 18:03:19 2004
+++ src/sys/kern/kern_proc.c	Mon Sep 13 10:40:03 2004
@@ -37,6 +37,7 @@
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
+#include <sys/unistd.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -80,12 +81,51 @@
 static void proc_fini(void *mem, int size);
 
 /*
+ * pid to proc lookup is done by indexing the pid_table array.
+ * Since pid numbers are only allocated when an empty slot
+ * has been found, there is no need to search any lists ever.
+ * (an orphaned pgrp will lock the slot, a session will lock
+ * the pgrp with the same number).
+ *
+ * If the table is too small it is reallocated with twice the
+ * previous size and the entries 'unzipped' into the two halves.
+ * A linked list of free entries is passed through the pt_proc
+ * field of 'free' items - set odd to be an invalid ptr.
+ */
+
+struct pid_table {
+	struct proc* pt_proc;
+	struct pgrp* pt_pgrp;
+};
+
+#if 1	/* strongly typed cast - should be a noop */
+static __inline intptr_t p2u(struct proc *p) { return (intptr_t)p; };
+#else
+#define p2u(p) ((intptr_t)p)
+#endif
+
+#define P_VALID(p) (!(p2u(p) & 1))
+#define P_NEXT(p) (p2u(p) >> 1)
+#define P_FREE(pid) ((struct proc *)((pid) << 1 | 1))
+
+#define INITIAL_PID_TABLE_SIZE  (1 << 7)
+#define RESERVED_PROCSLOT 10
+
+static struct pid_table *pid_table;
+
+static u_int pid_tbl_mask = (INITIAL_PID_TABLE_SIZE) - 1;	/* table size 2^n */
+static u_int pid_alloc_lim;	/* max we allocate before growing table */
+static u_int pid_alloc_cnt = 0;
+
+/* links through free slots - never empty! */
+static u_int next_free_pt, last_free_pt, next_free_pt_highid;
+static pid_t pid_max = 1 << 12;	/* largest value we alocate */
+
+static int randompid = 0;
+
+/*
  * Other process lists
  */
-struct pidhashhead *pidhashtbl;
-u_long pidhash;
-struct pgrphashhead *pgrphashtbl;
-u_long pgrphash;
 struct proclist allproc;
 struct proclist zombproc;
 struct sx allproc_lock;
@@ -95,6 +135,9 @@
 uma_zone_t proc_zone;
 uma_zone_t ithread_zone;
 
+int     nprocs = 1;             /* process 0 */
+int     lastpid = 0;
+
 int kstack_pages = KSTACK_PAGES;
 int uarea_pages = UAREA_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, "");
@@ -103,26 +146,287 @@
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 
 /*
- * Initialize global process hashing structures.
+ * Initialize global process mapping structures.
  */
 void
 procinit()
 {
 
+	int i;
 	sx_init(&allproc_lock, "allproc");
 	sx_init(&proctree_lock, "proctree");
 	mtx_init(&pargs_ref_lock, "struct pargs.ref", NULL, MTX_DEF);
 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
+
+	MALLOC(pid_table, struct pid_table *,
+		   INITIAL_PID_TABLE_SIZE * sizeof *pid_table, M_PROC, M_WAITOK);
+
+#define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
+	/*
+	 * Set free list running through table...
+	 * Preset 'use count' to -1 so we allocate pid 1 next.
+	 */
+	for (i = 0; i <= pid_tbl_mask; i++) {
+		pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
+		pid_table[i].pt_pgrp = 0;
+	}
+
+	/* slot 0 is just grabbed */
+	next_free_pt = 1;
+	next_free_pt_highid = RESERVED_PROCSLOT;
+	pid_table[0].pt_proc = &proc0;
+	pid_table[0].pt_pgrp = &pgrp0;
+
+	/* Need to fix fix last entry. */
+	last_free_pt = pid_tbl_mask;
+	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
+
+	/* point at which we grow table - to avoid reusing pids too often */
+	pid_alloc_lim = pid_tbl_mask - 1;
+#undef LINK_EMPTY
+	
 	LIST_INIT(&allproc);
 	LIST_INIT(&zombproc);
-	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
-	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
 }
 
+static void
+expand_pid_table(void) 
+{ 
+	u_int pt_size = pid_tbl_mask + 1;
+	struct pid_table *n_pt, *new_pt;
+	struct proc *proc;
+	struct pgrp *pgrp;
+	int i;
+	pid_t pid;
+
+	new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
+
+	sx_xlock(&allproc_lock);
+	if (pt_size != pid_tbl_mask + 1) {
+		/* Another process beat us to it... */
+		sx_xunlock(&allproc_lock);
+		FREE(new_pt, M_PROC);
+		return;
+	}
+
+	/*
+	 * Copy entries from old table into new one.
+	 * If 'pid' is 'odd' we need to place in the upper half,
+	 * even pid's to the lower half.
+	 *
+	 * Free items stay in the low half so we don't have to
+	 * fixup the reference to them.
+	 *
+	 * We stuff free items on the front of the freelist
+	 * because we can't write to unmodified entries.
+	 *
+	 * Processing the table backwards maintians a semblance
+	 * of issueing pid numbers that increase with time.
+	 */
+	i = pt_size - 1;
+	n_pt = new_pt + i;
+	for (; ; i--, n_pt--) {
+		proc = pid_table[i].pt_proc;
+		pgrp = pid_table[i].pt_pgrp;
+		if (!P_VALID(proc)) {
+			/* Up 'use count' so that link is valid */
+			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
+			proc = P_FREE(pid);
+			if (pgrp)
+				pid = pgrp->pg_id;
+		} else 
+			pid = proc->p_pid;
+
+		/* Save entry in appropriate half of table */
+		n_pt[pid & pt_size].pt_proc = proc;
+		n_pt[pid & pt_size].pt_pgrp = pgrp;
+
+		/* Put other piece on start of free list */
+		pid = (pid ^ pt_size) & ~pid_tbl_mask;
+		n_pt[pid & pt_size].pt_proc =
+			P_FREE((pid & ~pt_size) | next_free_pt);
+		n_pt[pid & pt_size].pt_pgrp = 0;
+		next_free_pt = i | (pid & pt_size);
+		if (i == 0)
+			break;
+	}
+ 
+	/* Switch tables */
+	n_pt = pid_table;
+	pid_table = new_pt;
+	pid_tbl_mask = pt_size * 2 - 1;
+
+	/*
+	 * pid_max starts as 1 >> 12 (4096), once we have 2048
+	 * allocated pids we need it to be larger!
+	 */
+	if (pid_tbl_mask > pid_max) {
+		pid_max = pid_tbl_mask * 2 + 1;
+		pid_alloc_lim |= pid_alloc_lim << 1;
+		if (pid_max > PID_MAX)
+			pid_max = PID_MAX;
+	} else
+		pid_alloc_lim <<= 1;    /* doubles number of free slots... */
+
+	sx_xunlock(&allproc_lock);
+	FREE(n_pt, M_PROC);
+}
+
+/*
+ * Allocate a free proc structure. This method is called from fork1.
+ *
+ * Expand the mapping table when needed.
+ */
+struct proc *
+proc_alloc(struct thread *td, int flags)
+{
+	struct proc *p, *p1;
+	int nxt;
+	pid_t pid;
+	struct pid_table *pt;
+	int ok;
+	uid_t uid;
+	int highpid;
+
+	p = uma_zalloc(proc_zone, M_WAITOK);
+	p->p_state = PRS_NEW;                       /* protect against others */
+    
+	/* allocate next free pid */
+	for (;;expand_pid_table()) { 
+		highpid = 0;
+		if (pid_alloc_cnt >= pid_alloc_lim)
+			/* ensure pids cycle through 2000+ values */
+			continue;
+		sx_xlock(&allproc_lock);
+		if ((flags & RFHIGHPID) && next_free_pt < RESERVED_PROCSLOT) {
+			highpid = 1;
+			pt = &pid_table[next_free_pt_highid];
+			nxt = P_NEXT(pt->pt_proc);
+			if (nxt & pid_tbl_mask) {
+				/* Fix the free list link */
+				if (!P_VALID(pid_table[RESERVED_PROCSLOT - 1].pt_proc))
+					pid_table[RESERVED_PROCSLOT - 1].pt_proc =
+						P_FREE(P_NEXT(pid_table[RESERVED_PROCSLOT - 1].pt_proc)
+							   | (nxt & pid_tbl_mask));
+				/* XXX: Shouldn't we unlock allproc_lock? */
+				break;
+			}
+		} else {
+			pt = &pid_table[next_free_pt];
+			nxt = P_NEXT(pt->pt_proc);
+			/* XXX: Shouldn't we unlock allproc_lock at the break? */
+			if (nxt & pid_tbl_mask)
+				break;
+		}
+		/* XXX: Shouldn't we unlock allproc_lock before break? */
+		if (nxt & pid_tbl_mask)
+			break;
+		/* Table full - expand (NB last entry not used....) */
+		sx_xunlock(&allproc_lock);
+	}
+
+	KASSERT((!P_VALID(pt->pt_proc)), ("Proc slot is not free"));
+	/* pid is 'saved use count' + 'size' + entry */
+	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1
+		+ (highpid ? next_free_pt_highid : next_free_pt);
+
+	/* 
+	 * Handle this now, so that we don't have to grab the allproc lock
+	 * again later in fork1().
+	 *
+	 * Although process entries are dynamically created, we still keep
+	 * a global limit on the maximum number we will create.  Don't allow
+	 * a nonprivileged user to use the last ten processes; don't let root
+	 * exceed the limit. The variable nprocs is the current number of
+	 * processes, maxproc is the limit.
+	 */
+	uid = td->td_ucred->cr_ruid;
+	if ((nprocs >= maxproc - 10 &&
+	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
+	    nprocs >= maxproc)
+		goto bad;
+
+	/*
+	 * Increment the nprocs resource before blocking can occur. There
+	 * are hard-limits as to the number of processes that can run.
+	 */
+	p1 = td->td_proc;
+	PROC_LOCK(p1);
+	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
+					(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
+	PROC_UNLOCK(p1);
+	if (!ok)
+		goto bad;
+
+	if (randompid) {
+		pid += (arc4random() % randompid) * (pid_tbl_mask + 1);
+	}
+
+	if ((u_int)pid > (u_int)pid_max)
+		pid &= pid_tbl_mask;
+
+	p->p_pid = pid;
+	if (highpid)
+		next_free_pt_highid = nxt & pid_tbl_mask;
+	else
+		next_free_pt = nxt & pid_tbl_mask;
+
+	/* Grab table slot */
+	pt->pt_proc = p;
+	pid_alloc_cnt++; 
+
+	/*
+	 * RFHIGHPID does not mess with the lastpid counter during boot.
+	 */
+	if (!(flags & RFHIGHPID))
+		lastpid = pid;
+
+	LIST_INSERT_HEAD(&allproc, p, p_list);
+	nprocs++;
+
+	sx_xunlock(&allproc_lock);
+
+	return p;
+
+bad:
+	uma_zfree(proc_zone, p);
+	sx_xunlock(&allproc_lock);
+	return (NULL);
+}
+
+/*
+ * Free last resources of a process - called from kern_wait (in kern_exit.c)
+ */
+void
+proc_free(struct proc *p)
+{
+	pid_t pid = p->p_pid;
+	struct pid_table *pt;
+
+	sx_xlock(&allproc_lock);
+
+	pt = &pid_table[pid & pid_tbl_mask];
+	/* save pid use count in slot */
+	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
+
+	if (pt->pt_pgrp == NULL) {
+		/* link last freed entry onto ours */
+		pid &= pid_tbl_mask;
+		pt = &pid_table[last_free_pt];
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
+		last_free_pt = pid;
+		pid_alloc_cnt--;
+	}
+ 
+	nprocs--;
+	sx_xunlock(&allproc_lock);
+	uma_zfree(proc_zone, p);
+}
+
 /*
  * Prepare a proc for use.
  */
@@ -241,15 +545,33 @@
 	register struct proc *p;
 
 	sx_slock(&allproc_lock);
-	LIST_FOREACH(p, PIDHASH(pid), p_hash)
-		if (p->p_pid == pid) {
-			if (p->p_state == PRS_NEW) {
-				p = NULL;
-				break;
-			}
-			PROC_LOCK(p);
-			break;
-		}
+	p = pid_table[pid & pid_tbl_mask].pt_proc;
+	/* Only allow live processes to be found by pid. */
+	if (!P_VALID(p) || p->p_pid != pid || p->p_state == PRS_NEW)
+		p = NULL;
+	else
+		PROC_LOCK(p);
+	/* XXX MP - need to have a reference count... */
+	sx_sunlock(&allproc_lock);
+	return (p);
+}
+
+/*
+ * Locate a zombie process by number
+ */
+struct proc *
+zpfind(register pid_t pid)
+{
+	register struct proc *p;
+
+	sx_slock(&allproc_lock);
+	p = pid_table[pid & pid_tbl_mask].pt_proc;
+	/* Only allow zombie processes to be found by pid. */
+	if (!P_VALID(p) || p->p_pid != pid || p->p_state != PRS_ZOMBIE)
+		p = NULL;
+	else
+		PROC_LOCK(p);
+	/* XXX MP - need to have a reference count... */
 	sx_sunlock(&allproc_lock);
 	return (p);
 }
@@ -264,15 +586,21 @@
 {
 	register struct pgrp *pgrp;
 
-	sx_assert(&proctree_lock, SX_LOCKED);
+	sx_slock(&allproc_lock);
+	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
 
-	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
-		if (pgrp->pg_id == pgid) {
-			PGRP_LOCK(pgrp);
-			return (pgrp);
-		}
-	}
-	return (NULL);
+	/*
+	 * Can't look up a pgrp that only exists because the session 
+	 * hasn't died yet (traditional) 
+	 */
+	if (pgrp == NULL || pgrp->pg_id != pgid 
+		|| LIST_EMPTY(&pgrp->pg_members)) 
+		pgrp = NULL; 
+	else
+		PGRP_LOCK(pgrp);
+	/* XXX MP - need to have a reference count... */ 
+	sx_sunlock(&allproc_lock);
+	return pgrp; 
 }
 
 /*
@@ -337,7 +665,7 @@
 	 * As we have an exclusive lock of proctree_lock,
 	 * this should not deadlock.
 	 */
-	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
+	pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
 	pgrp->pg_jobc = 0;
 	SLIST_INIT(&pgrp->pg_sigiolst);
 	PGRP_UNLOCK(pgrp);
@@ -436,6 +764,31 @@
 }
 
 /*
+ * remove the pg from the PIDTable
+ */
+static void
+pgunlink(pid_t pg_id)
+{
+	struct pgrp *pgrp; 
+	struct pid_table *pt; 
+    
+	sx_assert(&proctree_lock, SX_XLOCKED);
+	pt = &pid_table[pg_id & pid_tbl_mask]; 
+	pgrp = pt->pt_pgrp; 
+	pt->pt_pgrp = 0; 
+    
+	if (!P_VALID(pt->pt_proc)) { 
+		/* orphaned pgrp, put slot onto free list */     
+		pg_id &= pid_tbl_mask; 
+		pt = &pid_table[last_free_pt]; 
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 
+		last_free_pt = pg_id; 
+		pid_alloc_cnt--; 
+	} 
+  
+}
+
+/*
  * delete a process group
  */
 static void
@@ -459,7 +812,6 @@
 	if (pgrp->pg_session->s_ttyp != NULL &&
 	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
 		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
-	LIST_REMOVE(pgrp, pg_hash);
 	savesess = pgrp->pg_session;
 	SESS_LOCK(savesess);
 	i = --savesess->s_count;
@@ -470,6 +822,11 @@
 			ttyrel(savesess->s_ttyp);
 		mtx_destroy(&savesess->s_mtx);
 		FREE(savesess, M_SESSION);
+		pgunlink(pgrp->pg_id);
+	}
+	else {
+		if (savesess->s_sid != pgrp->pg_id)
+			pgunlink(pgrp->pg_id);
 	}
 	mtx_destroy(&pgrp->pg_mtx);
 	FREE(pgrp, M_PGRP);
@@ -493,6 +850,22 @@
 }
 
 /*
+ * Delete session - called from SESSRELE when s_count becomes zero.
+ */
+void
+sessdelete(struct session *ss)
+{
+	/*
+	 * We keep the pgrp with the same id as the session in
+	 * order to stop a process being given the same pid.
+	 * Since the pgrp holds a reference to the session, it
+	 * must be a 'zombie' pgrp by now.
+	 */
+	pgunlink(ss->s_sid);
+	FREE(ss, M_SESSION);
+}
+
+/*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"
  * the group for terminal job control (those with a parent in a different
@@ -581,24 +954,39 @@
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	register struct pgrp *pgrp;
+	register struct pid_table *pt;
 	register struct proc *p;
-	register int i;
+	int id;
+	int quit = 0;
 
-	for (i = 0; i <= pgrphash; i++) {
-		if (!LIST_EMPTY(&pgrphashtbl[i])) {
-			printf("\tindx %d\n", i);
-			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
-				printf(
-			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
-				    (void *)pgrp, (long)pgrp->pg_id,
-				    (void *)pgrp->pg_session,
-				    pgrp->pg_session->s_count,
-				    (void *)LIST_FIRST(&pgrp->pg_members));
-				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
-					printf("\t\tpid %ld addr %p pgrp %p\n", 
-					    (long)p->p_pid, (void *)p,
-					    (void *)p->p_pgrp);
-				}
+	db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
+	printf("pid table %p size %x, next %x, last %x\n",
+		   pid_table, pid_tbl_mask+1,
+		   next_free_pt, last_free_pt);
+	for (pt = pid_table, id = 0; id <= pid_tbl_mask && !quit; id++, pt++) {
+		p = pt->pt_proc;
+		if (!P_VALID(p) && !pt->pt_pgrp)
+			continue;
+		db_printf("  id %x: ", id);
+		if (P_VALID(p))
+			db_printf("proc %p id %d (0x%x) %s\n",
+					  p, p->p_pid, p->p_pid, p->p_comm);
+		else
+			db_printf("next %x use %x\n",
+					  P_NEXT(p) & pid_tbl_mask,
+					  P_NEXT(p) & ~pid_tbl_mask);
+		if ((pgrp = pt->pt_pgrp)) {
+			db_printf("\tsession %p, sid %d, count %d, login %s\n",
+					  pgrp->pg_session, pgrp->pg_session->s_sid,
+					  pgrp->pg_session->s_count,
+					  pgrp->pg_session->s_login);
+			db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
+					  pgrp, pgrp->pg_id, pgrp->pg_jobc,
+					  pgrp->pg_members.lh_first);
+			for (p = pgrp->pg_members.lh_first; p != 0 && !quit;
+				 p = p->p_pglist.le_next) {
+				db_printf("\t\tpid %d addr %p pgrp %p %s\n",
+						  p->p_pid, p, p->p_pgrp, p->p_comm);
 			}
 		}
 	}
@@ -817,24 +1205,6 @@
 		kp->ki_ppid = p->p_pptr->p_pid;
 }
 
-/*
- * Locate a zombie process by number
- */
-struct proc *
-zpfind(pid_t pid)
-{
-	struct proc *p;
-
-	sx_slock(&allproc_lock);
-	LIST_FOREACH(p, &zombproc, p_list)
-		if (p->p_pid == pid) {
-			PROC_LOCK(p);
-			break;
-		}
-	sx_sunlock(&allproc_lock);
-	return (p);
-}
-
 #define KERN_PROC_ZOMBMASK	0x3
 #define KERN_PROC_NOTHREADS	0x4
 
@@ -1187,6 +1557,7 @@
 	return (sysctl_handle_string(oidp, sv_name, 0, req));
 }
 
+SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, "Last used PID");
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
@@ -1252,3 +1623,54 @@
 
 SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
 	CTLFLAG_RD, sysctl_kern_proc, "Return process table, no threads");
+
+/*
+ * Random component to lastpid generation.  We mix in a random factor to make
+ * it a little harder to predict.  We sanity check the modulus value to avoid
+ * doing it in critical paths.  Don't let it be too small or we pointlessly
+ * waste randomness entropy, and don't let it be impossibly large.  Using a
+ * modulus that is too big causes a LOT more process table scans and slows
+ * down fork processing as the pidchecked caching is defeated.
+ */
+
+static int
+sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
+{
+	int error, pid;
+
+	sysctl_wire_old_buffer(req, sizeof(int));
+	pid = randompid;
+	error = sysctl_handle_int(oidp, &pid, 0, req);
+	if (error == 0 && req->newptr != NULL) {
+		if (pid < 0 || pid > 100)     /* out of range */
+			pid = 100;
+		else if (pid < 2)                       /* NOP */
+			pid = 0;
+		randompid = pid;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
+			0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
+
+static int
+sysctl_kern_currentmaxpid(SYSCTL_HANDLER_ARGS)
+{
+	int error, maxpid;
+
+	maxpid = pid_max;
+	sysctl_wire_old_buffer(req, sizeof(int));
+	error = sysctl_handle_int(oidp, &maxpid, 0, req);
+	if (error == 0 && req->newptr != NULL) {
+		if (maxpid > pid_tbl_mask * 2 + 1 && maxpid <= PID_MAX)
+			pid_max = maxpid;
+		else
+			error = EINVAL;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, maxpid, CTLTYPE_INT|CTLFLAG_RW,
+            0, 0, sysctl_kern_currentmaxpid, "I", "Current Maxium PID");
+
Index: src/sys/netinet/tcp_input.c
diff -u src/sys/netinet/tcp_input.c:1.252 src/sys/netinet/tcp_input.c:1.241.1000.19
--- src/sys/netinet/tcp_input.c:1.252	Wed Aug 18 06:05:54 2004
+++ src/sys/netinet/tcp_input.c	Thu Aug 19 19:29:15 2004
@@ -1,4 +1,54 @@
 /*
+ * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
+ * Copyright (c) 2002, 2003, 2004 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Jeffrey M. Hsu.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
+ *
+ * License terms: all terms for the DragonFly license above plus the following:
+ *
+ * 4. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *
+ *	This product includes software developed by Jeffrey M. Hsu
+ *	for the DragonFly Project.
+ *
+ *    This requirement may be waived with permission from Jeffrey Hsu.
+ *    This requirement will sunset and may be removed on July 8 2005,
+ *    after which the standard DragonFly license (as shown above) will
+ *    apply.
+ */
+
+/*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -127,11 +177,20 @@
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
     &tcp_do_rfc3042, 0, "Enable RFC 3042 (Limited Transmit)");
 
+static int tcp_do_early_retransmit = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, earlyretransmit, CTLFLAG_RW,
+    &tcp_do_early_retransmit, 0, "Early retransmit");
+
 static int tcp_do_rfc3390 = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
     &tcp_do_rfc3390, 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
+static int tcp_do_eifel_detect = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, eifel, CTLFLAG_RW,
+    &tcp_do_eifel_detect, 0,
+    "Eifel detection algorithm (RFC 3522)");
+
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
 	    "TCP Segment Reassembly Queue");
 
@@ -436,9 +495,9 @@
 
 	struct ip6_hdr *ip6 = NULL;
 #ifdef INET6
-	int isipv6;
+	boolean_t isipv6;
 #else
-	const int isipv6 = 0;
+	const boolean_t isipv6 = FALSE;
 #endif
 
 #ifdef TCPDEBUG
@@ -452,7 +511,7 @@
 #endif
 
 #ifdef INET6
-	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? TRUE : FALSE;
 #endif
 	bzero(&tao, sizeof(tao));
 	bzero((char *)&to, sizeof(to));
@@ -615,7 +674,7 @@
 	/* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
 	fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
-	if (fwd_tag != NULL && isipv6 == 0) {	/* IPv6 support is not yet */
+	if (fwd_tag != NULL && !isipv6) {	/* IPv6 support is not yet */
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag+1);
@@ -786,7 +845,7 @@
 		struct in_conninfo inc;
 
 #ifdef INET6
-		inc.inc_isipv6 = isipv6;
+		inc.inc_isipv6 = (isipv6 == TRUE);
 #endif
 		if (isipv6) {
 			inc.inc6_faddr = ip6->ip6_src;
@@ -1165,19 +1224,27 @@
 				++tcpstat.tcps_predack;
 				/*
 				 * "bad retransmit" recovery
+				 *
+				 * If Eifel detection applies, then
+				 * it is deterministic, so use it
+				 * unconditionally over the old heuristic
+				 * Otherwise, fall back to the old heuristic.
 				 */
-				if (tp->t_rxtshift == 1 &&
+				if (tcp_do_eifel_detect &&
+				    (to.to_flags & TOF_TS) && to.to_tsecr &&
+				    (tp->t_flags & TF_FIRSTACCACK)) {
+					/* Eifel detection applicable. */
+					if (to.to_tsecr < tp->t_rexmtTS) {
+						tcp_revert_congestion_state(tp);
+						++tcpstat.tcps_eifeldetected;
+					}
+				} else if (tp->t_rxtshift == 1 &&
 				    ticks < tp->t_badrxtwin) {
-					++tcpstat.tcps_sndrexmitbad;
-					tp->snd_cwnd = tp->snd_cwnd_prev;
-					tp->snd_ssthresh =
-					    tp->snd_ssthresh_prev;
-					tp->snd_recover = tp->snd_recover_prev;
-					if (tp->t_flags & TF_WASFRECOVERY)
-					    ENTER_FASTRECOVERY(tp);
-					tp->snd_nxt = tp->snd_max;
-					tp->t_badrxtwin = 0;
+					tcp_revert_congestion_state(tp);
+					++tcpstat.tcps_rttdetected;
 				}
+				tp->t_flags &= ~(TF_FIRSTACCACK |
+				    TF_FASTREXMT | TF_EARLYREXMT);
 
 				/*
 				 * Recalculate the transmit timer / rtt.
@@ -1201,9 +1268,7 @@
 				tcpstat.tcps_rcvackpack++;
 				tcpstat.tcps_rcvackbyte += acked;
 				sbdrop(&so->so_snd, acked);
-				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
-				    SEQ_LEQ(th->th_ack, tp->snd_recover))
-					tp->snd_recover = th->th_ack - 1;
+				tp->snd_recover = th->th_ack - 1;
 				tp->snd_una = th->th_ack;
 				/*
 				 * pull snd_wl2 up to prevent seq wrap relative
@@ -1947,7 +2012,7 @@
 					(void) tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
-					tcp_seq onxt = tp->snd_nxt;
+					tcp_seq onxt;
 					u_int win;
 
 					/*
@@ -1969,6 +2034,12 @@
 							break;
 						}
 					}
+fastretransmit:
+					if (tcp_do_eifel_detect &&
+					    (tp->t_flags & TF_RCVD_TSTMP)) {
+						tcp_save_congestion_state(tp);
+						tp->t_flags |= TF_FASTREXMT;
+					}
 					win = min(tp->snd_wnd, tp->snd_cwnd) /
 					    2 / tp->t_maxseg;
 					if (win < 2)
@@ -1978,6 +2049,7 @@
 					tp->snd_recover = tp->snd_max;
 					callout_stop(tp->tt_rexmt);
 					tp->t_rtttime = 0;
+					onxt = tp->snd_nxt;
 					if (tp->sack_enable) {
 						tcpstat.tcps_sack_recovery_episode++;
 						tp->snd_cwnd =
@@ -1992,6 +2064,7 @@
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = tp->t_maxseg;
 					(void) tcp_output(tp);
+					++tcpstat.tcps_sndfastrexmit;
 					KASSERT(tp->snd_limited <= 2,
 					    ("tp->snd_limited too big"));
 					tp->snd_cwnd = tp->snd_ssthresh +
@@ -2003,18 +2076,21 @@
 				} else if (tcp_do_rfc3042) {
 					u_long oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
+					/* outstanding data */
+					uint32_t ownd = tp->snd_max - tp->snd_una;
 					u_int sent;
 
+#define	iceildiv(n, d)		(((n)+(d)-1) / (d))
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("dupacks not 1 or 2"));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
-					tp->snd_cwnd =
-					    (tp->snd_nxt - tp->snd_una) +
+					tp->snd_cwnd = ownd +
 					    (tp->t_dupacks - tp->snd_limited) *
 					    tp->t_maxseg;
 					(void) tcp_output(tp);
+					tp->snd_cwnd = oldcwnd;
 					sent = tp->snd_max - oldsndmax;
 					if (sent > tp->t_maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
@@ -2022,9 +2098,24 @@
 						   (sent == tp->t_maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("sent too much"));
+						KASSERT(sent <=
+						    tp->t_maxseg * 2,
+						    ("sent too many segments"));
 						tp->snd_limited = 2;
-					} else if (sent > 0)
+						tcpstat.tcps_sndlimited += 2;
+					} else if (sent > 0) {
 						++tp->snd_limited;
+						++tcpstat.tcps_sndlimited;
+					} else if (tcp_do_early_retransmit &&
+					    (tcp_do_eifel_detect &&
+					    (tp->t_flags & TF_RCVD_TSTMP)) &&
+					    tcp_do_newreno &&
+					    tp->t_dupacks + 1 >=
+					      iceildiv(ownd, tp->t_maxseg)) {
+						++tcpstat.tcps_sndearlyrexmit;
+						tp->t_flags |= TF_EARLYREXMT;
+						goto fastretransmit;
+					}
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
@@ -2073,6 +2164,12 @@
 		}
 		tp->t_dupacks = 0;
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
+			/*
+			 * Detected optimistic ACK attack.
+			 * Force slow-start to de-synchronize attack.
+			 */
+			tp->snd_cwnd = tp->t_maxseg;
+
 			tcpstat.tcps_rcvacktoomuch++;
 			goto dropafterack;
 		}
@@ -2114,15 +2211,20 @@
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
-		if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
-			++tcpstat.tcps_sndrexmitbad;
-			tp->snd_cwnd = tp->snd_cwnd_prev;
-			tp->snd_ssthresh = tp->snd_ssthresh_prev;
-			tp->snd_recover = tp->snd_recover_prev;
-			if (tp->t_flags & TF_WASFRECOVERY)
-				ENTER_FASTRECOVERY(tp);
-			tp->snd_nxt = tp->snd_max;
-			tp->t_badrxtwin = 0;	/* XXX probably not required */
+		if (tcp_do_eifel_detect && acked &&
+		    (to.to_flags & TOF_TS) && to.to_tsecr &&
+		    (tp->t_flags & TF_FIRSTACCACK)) {
+			/* Eifel detection applicable. */
+			if (to.to_tsecr < tp->t_rexmtTS) {
+				++tcpstat.tcps_eifeldetected;
+				tcp_revert_congestion_state(tp);
+				if (tp->t_rxtshift == 1 &&
+				    ticks >= tp->t_badrxtwin)
+					++tcpstat.tcps_rttcantdetect;
+			}
+		} else if (tp->t_rxtshift == 1 && ticks < tp->t_badrxtwin) {
+			tcp_revert_congestion_state(tp);
+			++tcpstat.tcps_rttdetected;
 		}
 
 		/*
@@ -2167,6 +2269,9 @@
 		if (acked == 0)
 			goto step6;
 
+		/* Stop looking for an acceptable ACK since one was received. */
+		tp->t_flags &= ~(TF_FIRSTACCACK | TF_FASTREXMT | TF_EARLYREXMT);
+
 		/*
 		 * When new data is acked, open the congestion window.
 		 * If the window gives us less than ssthresh packets
@@ -2193,16 +2298,14 @@
 			ourfinisacked = 0;
 		}
 		sowwakeup_locked(so);
-		/* detect una wraparound */
-		if ((tcp_do_newreno || tp->sack_enable) &&
-		    !IN_FASTRECOVERY(tp) &&
-		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
-		    SEQ_LEQ(th->th_ack, tp->snd_recover))
-			tp->snd_recover = th->th_ack - 1;
-		if ((tcp_do_newreno || tp->sack_enable) &&
-		    IN_FASTRECOVERY(tp) &&
-		    SEQ_GEQ(th->th_ack, tp->snd_recover))
-			EXIT_FASTRECOVERY(tp);
+		if (tcp_do_newreno) {
+			if (IN_FASTRECOVERY(tp)) {
+				if (SEQ_GEQ(th->th_ack, tp->snd_recover))
+					EXIT_FASTRECOVERY(tp);
+			} else {
+				tp->snd_recover = th->th_ack - 1;
+			}
+		}
 		tp->snd_una = th->th_ack;
 		if (tp->sack_enable) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
@@ -2897,7 +3000,7 @@
 	struct rmxp_tao tao;
 	int origoffer = offer;
 #ifdef INET6
-	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
+	boolean_t isipv6 = ((inp->inp_vflag & INP_IPV6) ? TRUE : FALSE);
 	size_t min_protoh = isipv6 ?
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
@@ -3148,7 +3251,7 @@
 	u_long thcmtu = 0;
 	size_t min_protoh;
 #ifdef INET6
-	int isipv6 = inc->inc_isipv6 ? 1 : 0;
+	boolean_t isipv6 = (inc->inc_isipv6 ? TRUE : FALSE);
 #endif
 
 	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
@@ -3227,7 +3330,7 @@
 #ifdef INET6
 	int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #else
-	const int isipv6 = 0;
+	const boolean_t isipv6 = FALSE;
 #endif
 
 	thflags = th->th_flags;
Index: src/sys/netinet/tcp_timer.c
diff -u src/sys/netinet/tcp_timer.c:1.66 src/sys/netinet/tcp_timer.c:1.64.1000.5
--- src/sys/netinet/tcp_timer.c:1.66	Tue Aug 17 02:32:07 2004
+++ src/sys/netinet/tcp_timer.c	Tue Aug 17 10:13:43 2004
@@ -1,4 +1,54 @@
 /*
+ * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
+ * Copyright (c) 2002, 2003, 2004 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Jeffrey M. Hsu.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
+ *
+ * License terms: all terms for the DragonFly license above plus the following:
+ *
+ * 4. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *
+ *	This product includes software developed by Jeffrey M. Hsu
+ *	for the DragonFly Project.
+ *
+ *    This requirement may be waived with permission from Jeffrey Hsu.
+ *    This requirement will sunset and may be removed on July 8 2005,
+ *    after which the standard DragonFly license (as shown above) will
+ *    apply.
+ */
+
+/*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -469,6 +519,41 @@
 }
 
 void
+tcp_save_congestion_state(struct tcpcb *tp)
+{
+	tp->snd_cwnd_prev = tp->snd_cwnd;
+	tp->snd_ssthresh_prev = tp->snd_ssthresh;
+	tp->snd_recover_prev = tp->snd_recover;
+	if (IN_FASTRECOVERY(tp))
+	    tp->t_flags |= TF_WASFRECOVERY;
+	else
+	    tp->t_flags &= ~TF_WASFRECOVERY;
+	if (tp->t_flags & TF_RCVD_TSTMP) {
+		tp->t_rexmtTS = ticks;
+		tp->t_flags |= TF_FIRSTACCACK;
+	}
+}
+
+void
+tcp_revert_congestion_state(struct tcpcb *tp)
+{
+	tp->snd_cwnd = tp->snd_cwnd_prev;
+	tp->snd_ssthresh = tp->snd_ssthresh_prev;
+	tp->snd_recover = tp->snd_recover_prev;
+	if (tp->t_flags & TF_WASFRECOVERY)
+	    ENTER_FASTRECOVERY(tp);
+	if (tp->t_flags & TF_FASTREXMT) {
+		++tcpstat.tcps_sndfastrexmitbad;
+		if (tp->t_flags & TF_EARLYREXMT)
+		    ++tcpstat.tcps_sndearlyrexmitbad;
+	} else
+	    ++tcpstat.tcps_sndrtobad;
+	tp->t_badrxtwin = 0;
+	tp->t_rxtshift = 0;
+	tp->snd_nxt = tp->snd_max;
+}
+
+void
 tcp_timer_rexmt(xtp)
 	void *xtp;
 {
@@ -524,14 +609,9 @@
 		 * "On Estimating End-to-End Network Path Properties" by
 		 * Allman and Paxson for more details.
 		 */
-		tp->snd_cwnd_prev = tp->snd_cwnd;
-		tp->snd_ssthresh_prev = tp->snd_ssthresh;
-		tp->snd_recover_prev = tp->snd_recover;
-		if (IN_FASTRECOVERY(tp))
-		  tp->t_flags |= TF_WASFRECOVERY;
-		else
-		  tp->t_flags &= ~TF_WASFRECOVERY;
 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+		tcp_save_congestion_state(tp);
+		tp->t_flags &= ~(TF_FASTREXMT | TF_EARLYREXMT);
 	}
 	tcpstat.tcps_rexmttimeo++;
 	if (tp->t_state == TCPS_SYN_SENT)
Index: src/sys/netinet/tcp_var.h
diff -u src/sys/netinet/tcp_var.h:1.109 src/sys/netinet/tcp_var.h:1.105.1000.9
--- src/sys/netinet/tcp_var.h:1.109	Tue Aug 17 02:32:07 2004
+++ src/sys/netinet/tcp_var.h	Tue Aug 17 10:13:43 2004
@@ -1,4 +1,54 @@
 /*
+ * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
+ * Copyright (c) 2002, 2003, 2004 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Jeffrey M. Hsu.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2002, 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
+ *
+ * License terms: all terms for the DragonFly license above plus the following:
+ *
+ * 4. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *
+ *	This product includes software developed by Jeffrey M. Hsu
+ *	for the DragonFly Project.
+ *
+ *    This requirement may be waived with permission from Jeffrey Hsu.
+ *    This requirement will sunset and may be removed on July 8 2005,
+ *    after which the standard DragonFly license (as shown above) will
+ *    apply.
+ */
+
+/*
  * Copyright (c) 1982, 1986, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -90,29 +140,33 @@
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	int	t_state;		/* state of this connection */
 	u_int	t_flags;
-#define	TF_ACKNOW	0x000001	/* ack peer immediately */
-#define	TF_DELACK	0x000002	/* ack, but try to delay it */
-#define	TF_NODELAY	0x000004	/* don't delay packets to coalesce */
-#define	TF_NOOPT	0x000008	/* don't use tcp options */
-#define	TF_SENTFIN	0x000010	/* have sent FIN */
-#define	TF_REQ_SCALE	0x000020	/* have/will request window scaling */
-#define	TF_RCVD_SCALE	0x000040	/* other side has requested scaling */
-#define	TF_REQ_TSTMP	0x000080	/* have/will request timestamps */
-#define	TF_RCVD_TSTMP	0x000100	/* a timestamp was received in SYN */
-#define	TF_SACK_PERMIT	0x000200	/* other side said I could SACK */
-#define	TF_NEEDSYN	0x000400	/* send SYN (implicit state) */
-#define	TF_NEEDFIN	0x000800	/* send FIN (implicit state) */
-#define	TF_NOPUSH	0x001000	/* don't push */
-#define	TF_REQ_CC	0x002000	/* have/will request CC */
-#define	TF_RCVD_CC	0x004000	/* a CC was received in SYN */
-#define	TF_SENDCCNEW	0x008000	/* send CCnew instead of CC in SYN */
-#define	TF_MORETOCOME	0x010000	/* More data to be appended to sock */
-#define	TF_LQ_OVERFLOW	0x020000	/* listen queue overflow */
-#define	TF_LASTIDLE	0x040000	/* connection was previously idle */
-#define	TF_RXWIN0SENT	0x080000	/* sent a receiver win 0 in response */
-#define	TF_FASTRECOVERY	0x100000	/* in NewReno Fast Recovery */
-#define	TF_WASFRECOVERY	0x200000	/* was in NewReno Fast Recovery */
-#define	TF_SIGNATURE	0x400000	/* require MD5 digests (RFC2385) */
+#define	TF_ACKNOW	0x00000001	/* ack peer immediately */
+#define	TF_DELACK	0x00000002	/* ack, but try to delay it */
+#define	TF_NODELAY	0x00000004	/* don't delay packets to coalesce */
+#define	TF_NOOPT	0x00000008	/* don't use tcp options */
+#define	TF_SENTFIN	0x00000010	/* have sent FIN */
+#define	TF_REQ_SCALE	0x00000020	/* have/will request window scaling */
+#define	TF_RCVD_SCALE	0x00000040	/* other side has requested scaling */
+#define	TF_REQ_TSTMP	0x00000080	/* have/will request timestamps */
+#define	TF_RCVD_TSTMP	0x00000100	/* a timestamp was received in SYN */
+#define	TF_SACK_PERMIT	0x00000200	/* other side said I could SACK */
+#define	TF_NEEDSYN	0x00000400	/* send SYN (implicit state) */
+#define	TF_NEEDFIN	0x00000800	/* send FIN (implicit state) */
+#define	TF_NOPUSH	0x00001000	/* don't push */
+#define	TF_REQ_CC	0x00002000	/* have/will request CC */
+#define	TF_RCVD_CC	0x00004000	/* a CC was received in SYN */
+#define	TF_SENDCCNEW	0x00008000	/* send CCnew instead of CC in SYN */
+#define	TF_MORETOCOME	0x00010000	/* More data to be appended to sock */
+#define	TF_LQ_OVERFLOW	0x00020000	/* listen queue overflow */
+#define	TF_LASTIDLE	0x00040000	/* connection was previously idle */
+#define	TF_RXWIN0SENT	0x00080000	/* sent a receiver win 0 in response */
+#define	TF_FASTRECOVERY	0x00100000	/* in NewReno Fast Recovery */
+#define	TF_WASFRECOVERY	0x00200000	/* was in NewReno Fast Recovery */
+#define	TF_SIGNATURE	0x00400000	/* require MD5 digests (RFC2385) */
+#define	TF_FIRSTACCACK	0x00800000	/* Look for 1st acceptable ACK. */
+#define	TF_FASTREXMT	0x01000000	/* Did Fast Retransmit. */
+#define	TF_EARLYREXMT	0x02000000	/* Did Early (Fast) Retransmit. */
+
 	int	t_force;		/* 1 if forcing out a byte */
 
 	tcp_seq	snd_una;		/* send unacknowledged */
@@ -186,6 +240,7 @@
 	u_long	snd_ssthresh_prev;	/* ssthresh prior to retransmit */
 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
 	u_long	t_badrxtwin;		/* window for retransmit recovery */
+	u_long	t_rexmtTS;		/* timestamp of last retransmit */
 	u_char	snd_limited;		/* segments limited transmitted */
 /* anti DoS counters */
 	u_long	rcv_second;		/* start of interval second */
@@ -395,7 +450,16 @@
 	u_long	tcps_sndbyte;		/* data bytes sent */
 	u_long	tcps_sndrexmitpack;	/* data packets retransmitted */
 	u_long	tcps_sndrexmitbyte;	/* data bytes retransmitted */
-	u_long	tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
+	u_long	tcps_sndfastrexmit;	/* Fast Retransmissions */
+	u_long	tcps_sndearlyrexmit;	/* early Fast Retransmissions */
+	u_long	tcps_sndlimited;	/* Limited Transmit packets */
+	u_long	tcps_sndrtobad;		/* spurious RTO retransmissions */
+	u_long	tcps_sndfastrexmitbad;	/* spurious Fast Retransmissions */
+	u_long	tcps_sndearlyrexmitbad;	/* spurious early Fast Retransmissions,
+					   a subset of tcps_sndfastrexmitbad */
+	u_long	tcps_eifeldetected;	/* Eifel-detected spurious rexmits */
+	u_long	tcps_rttcantdetect;	/* Eifel but not 1/2 RTT-detectable */
+	u_long	tcps_rttdetected;	/* RTT-detected spurious RTO rexmits */
 	u_long	tcps_sndacks;		/* ack-only packets sent */
 	u_long	tcps_sndprobe;		/* window probes sent */
 	u_long	tcps_sndurg;		/* packets sent with URG only */
@@ -572,6 +636,8 @@
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
 int	 tcp_twrespond(struct tcptw *, int);
+void	 tcp_save_congestion_state(struct tcpcb *tp);
+void	 tcp_revert_congestion_state(struct tcpcb *tp);
 void	 tcp_setpersist(struct tcpcb *);
 #ifdef TCP_SIGNATURE
 int	 tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
Index: src/sys/sys/proc.h
diff -u src/sys/sys/proc.h:1.392.2.9 src/sys/sys/proc.h:1.375.1000.22
--- src/sys/sys/proc.h:1.392.2.9	Sat Sep 18 12:11:35 2004
+++ src/sys/sys/proc.h	Mon Sep 20 10:13:34 2004
@@ -88,7 +88,6 @@
  * (c)		const until freeing
  */
 struct pgrp {
-	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
@@ -699,7 +698,7 @@
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s) {							\
 	if (--(s)->s_count == 0)					\
-		FREE(s, M_SESSION);					\
+		sessdelete(s);					        \
 }
 
 #define	STOPEVENT(p, e, v) do {						\
@@ -773,18 +772,11 @@
 #define	PARGS_LOCK(p)		mtx_lock(&pargs_ref_lock)
 #define	PARGS_UNLOCK(p)		mtx_unlock(&pargs_ref_lock)
 
-#define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
-extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
-extern u_long pidhash;
-
-#define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
-extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
-extern u_long pgrphash;
-
 extern struct sx allproc_lock;
 extern struct sx proctree_lock;
 extern struct mtx pargs_ref_lock;
 extern struct mtx ppeers_lock;
+extern struct pgrp pgrp0;		/* Process group for swapper. */
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread thread0;		/* Primary thread in proc0. */
 extern struct ksegrp ksegrp0;		/* Primary ksegrp in proc0. */
@@ -827,6 +819,11 @@
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	mi_switch(int flags, struct thread *newtd);
+
+void	sessdelete(struct session *);
+struct	proc *proc_alloc(struct thread *td, int flags);
+void	proc_free(struct proc *p);
+
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);
Index: src/usr.bin/netstat/inet.c
diff -u src/usr.bin/netstat/inet.c:1.67 src/usr.bin/netstat/inet.c:1.65.1000.4
--- src/usr.bin/netstat/inet.c:1.67	Tue Jul 27 04:18:11 2004
+++ src/usr.bin/netstat/inet.c	Tue Jul 27 13:10:32 2004
@@ -383,8 +383,14 @@
 		"\t\t%lu data packet%s (%lu byte%s)\n");
 	p2(tcps_sndrexmitpack, tcps_sndrexmitbyte,
 		"\t\t%lu data packet%s (%lu byte%s) retransmitted\n");
-	p(tcps_sndrexmitbad,
-		"\t\t%lu data packet%s unnecessarily retransmitted\n");
+	p2a(tcps_sndfastrexmit, tcps_sndearlyrexmit,
+		"\t\t%lu Fast Retransmit%s (%lu early)\n");
+	p(tcps_sndlimited, "\t\t%lu packet%s sent by Limited Transmit\n");
+	p(tcps_sndrtobad, "\t\t%lu spurious RTO retransmit%s\n");
+	p2a(tcps_sndfastrexmitbad, tcps_sndearlyrexmitbad,
+		"\t\t%lu spurious Fast Retransmit%s (%lu early)\n");
+	p(tcps_eifeldetected, "\t\t%lu Eifel-detected spurious retransmit%s\n");
+	p(tcps_rttdetected, "\t\t%lu RTT-detected spurious retransmit%s\n");
 	p(tcps_mturesent, "\t\t%lu resend%s initiated by MTU discovery\n");
 	p2a(tcps_sndacks, tcps_delack,
 		"\t\t%lu ack-only packet%s (%lu delayed)\n");
Index: src/usr.sbin/inetd/inetd.c
diff -u src/usr.sbin/inetd/inetd.c:1.127 src/usr.sbin/inetd/inetd.c:1.126.1000.2
--- src/usr.sbin/inetd/inetd.c:1.127	Sat Aug  7 12:27:50 2004
+++ src/usr.sbin/inetd/inetd.c	Sat Aug  7 22:07:53 2004
@@ -113,6 +113,8 @@
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/event.h>
 #include <sys/stat.h>
 #include <sys/un.h>
 
@@ -202,6 +204,7 @@
 #ifndef TOOMANY
 #define	TOOMANY		256		/* don't start more than TOOMANY */
 #endif
+
 #define	CNT_INTVL	60		/* servers in CNT_INTVL sec. */
 #define	RETRYTIME	(60*10)		/* retry after bind or server fail */
 #define MAX_MAXCHLD	32767		/* max allowable max children */
@@ -209,8 +212,6 @@
 #define	SIGBLOCK	(sigmask(SIGCHLD)|sigmask(SIGHUP)|sigmask(SIGALRM))
 
 void		close_sep(struct servtab *);
-void		flag_signal(int);
-void		flag_config(int);
 void		config(void);
 int		cpmip(const struct servtab *, int);
 void		endconfig(void);
@@ -220,11 +221,8 @@
 int		matchservent(const char *, const char *, const char *);
 char	       *nextline(FILE *);
 void		addchild(struct servtab *, int);
-void		flag_reapchild(int);
-void		reapchild(void);
 void		enable(struct servtab *);
 void		disable(struct servtab *);
-void		flag_retry(int);
 void		retry(void);
 int		setconfig(void);
 void		setup(struct servtab *);
@@ -235,7 +233,6 @@
 static struct conninfo *search_conn(struct servtab *sep, int ctrl);
 static int	room_conn(struct servtab *sep, struct conninfo *conn);
 static void	addchild_conn(struct conninfo *conn, pid_t pid);
-static void	reapchild_conn(pid_t pid);
 static void	free_conn(struct conninfo *conn);
 static void	resize_conn(struct servtab *sep, int maxperip);
 static void	free_connlist(struct servtab *sep);
@@ -250,7 +247,7 @@
 int	debug = 0;
 int	dolog = 0;
 int	maxsock;			/* highest-numbered descriptor */
-fd_set	allsock;
+int	kqsock;
 int	options;
 int	timingout;
 int	toomany = TOOMANY;
@@ -266,7 +263,6 @@
 struct	sockaddr_in6 *bind_sa6;
 int	v6bind_ok = 0;
 #endif
-int	signalpipe[2];
 #ifdef SANITY_CHECK
 int	nsock;
 #endif
@@ -317,6 +313,7 @@
 int
 main(int argc, char **argv)
 {
+	struct kevent	kqevlist[16];
 	struct servtab *sep;
 	struct passwd *pwd;
 	struct group *grp;
@@ -340,7 +337,11 @@
 #define peer4	p_un.peer_un4
 #define peer6	p_un.peer_un6
 #define peermax	p_un.peer_max
-	int i;
+	int i, j;
+#ifdef SANITY_CHECK
+	int k;
+#endif
+	int status;
 	struct addrinfo hints, *res;
 	const char *servname;
 	int error;
@@ -517,19 +518,19 @@
 	}
 #endif
 
+	kqsock = kqueue();
+
 	sa.sa_flags = 0;
 	sigemptyset(&sa.sa_mask);
 	sigaddset(&sa.sa_mask, SIGALRM);
 	sigaddset(&sa.sa_mask, SIGCHLD);
 	sigaddset(&sa.sa_mask, SIGHUP);
-	sa.sa_handler = flag_retry;
+	sa.sa_handler = SIG_IGN;
 	sigaction(SIGALRM, &sa, &saalrm);
+	WATCH_SIG(SIGALRM, retry);
 	config();
-	sa.sa_handler = flag_config;
 	sigaction(SIGHUP, &sa, &sahup);
-	sa.sa_handler = flag_reapchild;
-	sigaction(SIGCHLD, &sa, &sachld);
-	sa.sa_handler = SIG_IGN;
+	WATCH_SIG(SIGHUP, config);
 	sigaction(SIGPIPE, &sa, &sapipe);
 
 	{
@@ -542,27 +543,8 @@
 		(void)setenv("inetd_dummy", dummy, 1);
 	}
 
-	if (pipe(signalpipe) != 0) {
-		syslog(LOG_ERR, "pipe: %m");
-		exit(EX_OSERR);
-	}
-	if (fcntl(signalpipe[0], F_SETFD, FD_CLOEXEC) < 0 ||
-	    fcntl(signalpipe[1], F_SETFD, FD_CLOEXEC) < 0) {
-		syslog(LOG_ERR, "signalpipe: fcntl (F_SETFD, FD_CLOEXEC): %m");
-		exit(EX_OSERR);
-	}
-	FD_SET(signalpipe[0], &allsock);
-#ifdef SANITY_CHECK
-	nsock++;
-#endif
-	if (signalpipe[0] > maxsock)
-	    maxsock = signalpipe[0];
-	if (signalpipe[1] > maxsock)
-	    maxsock = signalpipe[1];
-
 	for (;;) {
 	    int n, ctrl;
-	    fd_set readable;
 
 #ifdef SANITY_CHECK
 	    if (nsock == 0) {
@@ -570,46 +552,60 @@
 		exit(EX_SOFTWARE);
 	    }
 #endif
-	    readable = allsock;
-	    if ((n = select(maxsock + 1, &readable, (fd_set *)0,
-		(fd_set *)0, (struct timeval *)0)) <= 0) {
-		    if (n < 0 && errno != EINTR) {
-			syslog(LOG_WARNING, "select: %m");
+
+	    if ((n = kevent(kqsock, NULL, 0, kqevlist,
+		sizeof kqevlist / sizeof *kqevlist,
+		(struct timespec *)0)) <= 0) {
+		    if (n == -1 && errno != EINTR) {
+			syslog(LOG_WARNING, "kevent: %m");
 			sleep(1);
 		    }
 		    continue;
 	    }
-	    /* handle any queued signal flags */
-	    if (FD_ISSET(signalpipe[0], &readable)) {
-		int nsig;
-		if (ioctl(signalpipe[0], FIONREAD, &nsig) != 0) {
-		    syslog(LOG_ERR, "ioctl: %m");
-		    exit(EX_OSERR);
-		}
-		while (--nsig >= 0) {
-		    char c;
-		    if (read(signalpipe[0], &c, 1) != 1) {
-			syslog(LOG_ERR, "read: %m");
-			exit(EX_OSERR);
-		    }
+
+	    for (j = 0; j < n; j++) {
+		if (kqevlist[j].filter == EVFILT_SIGNAL) {
+		    /* handle any queued signal flags */
 		    if (debug)
-			warnx("handling signal flag %c", c);
-		    switch(c) {
-		    case 'A': /* sigalrm */
-			retry();
-			break;
-		    case 'C': /* sigchld */
-			reapchild();
-			break;
-		    case 'H': /* sighup */
-			config();
-			break;
-		    }
-		}
-	    }
-	    for (sep = servtab; n && sep; sep = sep->se_next)
-	        if (sep->se_fd != -1 && FD_ISSET(sep->se_fd, &readable)) {
-		    n--;
+			warnx("calling signalhandler for sig %d",
+			    kqevlist[j].ident);
+		    ((void (*)())kqevlist[j].udata)();
+		} else if (kqevlist[j].filter == EVFILT_PROC) {
+			sep = (struct servtab *)kqevlist[j].udata;
+			pid = wait4(kqevlist[j].ident, &status, WNOHANG,
+			    (struct rusage *)0);
+			if (debug)
+				warnx("%d reaped, status %#x", pid, status);
+			if (pid == 0) {
+				/* XXX - this could leave a zombie */
+				syslog(LOG_WARNING, "can't reap pid %d",
+				    kqevlist[j].ident);
+				continue;
+			}
+#ifdef SANITY_CHECK
+			for (k = 0; k < sep->se_numchild; k++)
+				if (sep->se_pids[k] == pid)
+					break;
+			if (k != sep->se_numchild)
+				sep->se_pids[k] =
+				    sep->se_pids[sep->se_numchild - 1];
+#endif
+			if (sep->se_maxchild &&
+			    sep->se_numchild == sep->se_maxchild)
+				enable(sep);
+			if (status)
+				syslog(LOG_WARNING,
+				    "%s[%d]: exit status 0x%x",
+				    sep->se_server, pid, status);
+			/* XXX - this should never happen */
+			if (--sep->se_numchild < 0)
+				sep->se_numchild = 0;
+			if (sep->se_free && sep->se_numchild == 0) {
+				freeconfig(sep);
+				free((char *)sep);
+			}
+		} else {
+		    sep = (struct servtab *)kqevlist[j].udata;
 		    if (debug)
 			    warnx("someone wants %s", sep->se_service);
 		    dofork = !sep->se_bi || sep->se_bi->bi_fork || ISWRAP(sep);
@@ -883,21 +879,7 @@
 		    if (sep->se_accept && sep->se_socktype == SOCK_STREAM)
 			    close(ctrl);
 		}
-	}
-}
-
-/*
- * Add a signal flag to the signal flag queue for later handling
- */
-
-void
-flag_signal(int c)
-{
-	char ch = c;
-
-	if (write(signalpipe[1], &ch, 1) != 1) {
-		syslog(LOG_ERR, "write: %m");
-		_exit(EX_OSERR);
+	    }
 	}
 }
 
@@ -909,72 +891,18 @@
 void
 addchild(struct servtab *sep, pid_t pid)
 {
-	if (sep->se_maxchild <= 0)
-		return;
 #ifdef SANITY_CHECK
-	if (sep->se_numchild >= sep->se_maxchild) {
+	if (sep->se_maxchild && sep->se_numchild >= sep->se_maxchild) {
 		syslog(LOG_ERR, "%s: %d >= %d",
 		    __FUNCTION__, sep->se_numchild, sep->se_maxchild);
 		exit(EX_SOFTWARE);
 	}
+	sep->se_pids[sep->se_numchild] = pid;
 #endif
-	sep->se_pids[sep->se_numchild++] = pid;
-	if (sep->se_numchild == sep->se_maxchild)
+	sep->se_numchild++;
+	if (sep->se_maxchild && sep->se_numchild == sep->se_maxchild)
 		disable(sep);
-}
-
-/*
- * Some child process has exited. See if it's on somebody's list.
- */
-
-void
-flag_reapchild(int signo __unused)
-{
-	flag_signal('C');
-}
-
-void
-reapchild(void)
-{
-	int k, status;
-	pid_t pid;
-	struct servtab *sep;
-
-	for (;;) {
-		pid = wait3(&status, WNOHANG, (struct rusage *)0);
-		if (pid <= 0)
-			break;
-		if (debug)
-			warnx("%d reaped, %s %u", pid,
-			    WIFEXITED(status) ? "status" : "signal",
-			    WIFEXITED(status) ? WEXITSTATUS(status)
-				: WTERMSIG(status));
-		for (sep = servtab; sep; sep = sep->se_next) {
-			for (k = 0; k < sep->se_numchild; k++)
-				if (sep->se_pids[k] == pid)
-					break;
-			if (k == sep->se_numchild)
-				continue;
-			if (sep->se_numchild == sep->se_maxchild)
-				enable(sep);
-			sep->se_pids[k] = sep->se_pids[--sep->se_numchild];
-			if (WIFSIGNALED(status) || WEXITSTATUS(status))
-				syslog(LOG_WARNING,
-				    "%s[%d]: exited, %s %u",
-				    sep->se_server, pid,
-				    WIFEXITED(status) ? "status" : "signal",
-				    WIFEXITED(status) ? WEXITSTATUS(status)
-					: WTERMSIG(status));
-			break;
-		}
-		reapchild_conn(pid);
-	}
-}
-
-void
-flag_config(int signo __unused)
-{
-	flag_signal('H');
+	WATCH_PROC(pid, sep);
 }
 
 void
@@ -991,8 +919,10 @@
 		syslog(LOG_ERR, "%s: %m", CONFIG);
 		return;
 	}
-	for (sep = servtab; sep; sep = sep->se_next)
+
+	for (sep = servtab; sep != NULL; sep = sep->se_next)
 		sep->se_checked = 0;
+
 	while ((new = getconfigent())) {
 		if (getpwnam(new->se_user) == NULL) {
 			syslog(LOG_ERR,
@@ -1038,12 +968,17 @@
 			/* copy over outstanding child pids */
 			if (sep->se_maxchild > 0 && new->se_maxchild > 0) {
 				new->se_numchild = sep->se_numchild;
+				/* XXX - this can cause problems */
 				if (new->se_numchild > new->se_maxchild)
 					new->se_numchild = new->se_maxchild;
+#ifdef SANITY_CHECK
 				memcpy(new->se_pids, sep->se_pids,
 				    new->se_numchild * sizeof(*new->se_pids));
+#endif
 			}
+#ifdef SANITY_CHECK
 			SWAP(pid_t *, sep->se_pids, new->se_pids);
+#endif
 			sep->se_maxchild = new->se_maxchild;
 			sep->se_numchild = new->se_numchild;
 			sep->se_maxcpm = new->se_maxcpm;
@@ -1052,14 +987,11 @@
 			sep->se_bi = new->se_bi;
 			/* might need to turn on or off service now */
 			if (sep->se_fd >= 0) {
-			      if (sep->se_maxchild > 0
-				  && sep->se_numchild == sep->se_maxchild) {
-				      if (FD_ISSET(sep->se_fd, &allsock))
-					  disable(sep);
-			      } else {
-				      if (!FD_ISSET(sep->se_fd, &allsock))
-					  enable(sep);
-			      }
+				if (sep->se_maxchild
+				    && sep->se_numchild == sep->se_maxchild)
+					disable(sep);
+				else
+					enable(sep);
 			}
 			sep->se_accept = new->se_accept;
 			SWAP(char *, sep->se_user, new->se_user);
@@ -1179,8 +1111,11 @@
 			print_service("FREE", sep);
 		if (sep->se_rpc && sep->se_rpc_prog > 0)
 			unregisterrpc(sep);
-		freeconfig(sep);
-		free(sep);
+		if (sep->se_numchild == 0) {
+			freeconfig(sep);
+			free((char *)sep);
+		} else
+			sep->se_free = 1;
 	}
 	(void) sigsetmask(omask);
 }
@@ -1241,12 +1176,6 @@
 }
 
 void
-flag_retry(int signo __unused)
-{
-	flag_signal('A');
-}
-
-void
 retry(void)
 {
 	struct servtab *sep;
@@ -1280,12 +1209,12 @@
 #define	turnon(fd, opt) \
 setsockopt(fd, SOL_SOCKET, opt, (char *)&on, sizeof (on))
 	if (strcmp(sep->se_proto, "tcp") == 0 && (options & SO_DEBUG) &&
-	    turnon(sep->se_fd, SO_DEBUG) < 0)
+	    turnon(sep->se_fd, SO_DEBUG) == -1)
 		syslog(LOG_ERR, "setsockopt (SO_DEBUG): %m");
-	if (turnon(sep->se_fd, SO_REUSEADDR) < 0)
+	if (turnon(sep->se_fd, SO_REUSEADDR) == -1)
 		syslog(LOG_ERR, "setsockopt (SO_REUSEADDR): %m");
 #ifdef SO_PRIVSTATE
-	if (turnon(sep->se_fd, SO_PRIVSTATE) < 0)
+	if (turnon(sep->se_fd, SO_PRIVSTATE) == -1)
 		syslog(LOG_ERR, "setsockopt (SO_PRIVSTATE): %m");
 #endif
 	/* tftpd opens a new connection then needs more infos */
@@ -1293,7 +1222,7 @@
 	    (strcmp(sep->se_proto, "udp") == 0) &&
 	    (sep->se_accept == 0) &&
 	    (setsockopt(sep->se_fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
-			(char *)&on, sizeof (on)) < 0))
+			(char *)&on, sizeof (on)) == -1))
 		syslog(LOG_ERR, "setsockopt (IPV6_RECVPKTINFO): %m");
 	if (sep->se_family == AF_INET6) {
 		int flag = sep->se_nomapped ? 1 : 0;
@@ -1476,8 +1405,7 @@
 close_sep(struct servtab *sep)
 {
 	if (sep->se_fd >= 0) {
-		if (FD_ISSET(sep->se_fd, &allsock))
-			disable(sep);
+		disable(sep);
 		(void) close(sep->se_fd);
 		sep->se_fd = -1;
 	}
@@ -1516,7 +1444,7 @@
 	long omask;
 
 	sep = (struct servtab *)malloc(sizeof (*sep));
-	if (sep == (struct servtab *)0) {
+	if (sep == NULL) {
 		syslog(LOG_ERR, "malloc: %m");
 		exit(EX_OSERR);
 	}
@@ -1546,14 +1474,9 @@
 		    "%s: %s: is mux", __FUNCTION__, sep->se_service);
 		exit(EX_SOFTWARE);
 	}
-	if (FD_ISSET(sep->se_fd, &allsock)) {
-		syslog(LOG_ERR,
-		    "%s: %s: not off", __FUNCTION__, sep->se_service);
-		exit(EX_SOFTWARE);
-	}
 	nsock++;
 #endif
-	FD_SET(sep->se_fd, &allsock);
+	WATCH_SOCK(sep->se_fd, sep);
 	if (sep->se_fd > maxsock)
 		maxsock = sep->se_fd;
 }
@@ -1575,18 +1498,13 @@
 		    "%s: %s: is mux", __FUNCTION__, sep->se_service);
 		exit(EX_SOFTWARE);
 	}
-	if (!FD_ISSET(sep->se_fd, &allsock)) {
-		syslog(LOG_ERR,
-		    "%s: %s: not on", __FUNCTION__, sep->se_service);
-		exit(EX_SOFTWARE);
-	}
 	if (nsock == 0) {
 		syslog(LOG_ERR, "%s: nsock=0", __FUNCTION__);
 		exit(EX_SOFTWARE);
 	}
 	nsock--;
 #endif
-	FD_CLR(sep->se_fd, &allsock);
+	UNWATCH_SOCK(sep->se_fd, sep);
 	if (sep->se_fd == maxsock)
 		maxsock--;
 }
@@ -1971,6 +1889,7 @@
 		else
 			sep->se_maxchild = 1;
 	}
+#ifdef SANITY_CHECK
 	if (sep->se_maxchild > 0) {
 		sep->se_pids = malloc(sep->se_maxchild * sizeof(*sep->se_pids));
 		if (sep->se_pids == NULL) {
@@ -1978,6 +1897,7 @@
 			exit(EX_OSERR);
 		}
 	}
+#endif
 	argc = 0;
 	for (arg = skip(&cp); cp; arg = skip(&cp))
 		if (argc < MAXARGV) {
@@ -2017,8 +1937,10 @@
 #endif
 	if (cp->se_server)
 		free(cp->se_server);
+#ifdef SANITY_CHECK
 	if (cp->se_pids)
 		free(cp->se_pids);
+#endif
 	for (i = 0; i < MAXARGV; i++)
 		if (cp->se_argv[i])
 			free(cp->se_argv[i]);
@@ -2357,6 +2279,36 @@
 	return(r);
 }
 
+void
+watch(filter, ident, data, fflags, addrm)
+	short filter;
+	uintptr_t ident;
+	void *data;
+	u_int fflags;
+	int addrm;
+{
+	struct kevent kev;
+	struct kevent *kptr;
+	int i;
+
+	kptr = &kev;
+
+	kev.ident = ident;
+	kev.filter = filter;
+	kev.flags = addrm ? EV_ADD|EV_ENABLE : EV_DELETE|EV_DISABLE;
+	kev.fflags = fflags;
+	kev.udata = data;
+
+	i = kevent(kqsock, kptr, 1, NULL, 0, NULL);
+
+	if (i == -1)
+		syslog(LOG_ERR, "kevent failed: %m");
+	
+	if (debug) {
+		warnx("kqueue, ident: %d, addrm: %d, ret: %d, data: %p, errno: %s", ident, addrm, i, data, strerror(errno));
+	}
+}
+
 static struct conninfo *
 search_conn(struct servtab *sep, int ctrl)
 {
@@ -2468,26 +2420,6 @@
 }
 
 static void
-reapchild_conn(pid_t pid)
-{
-	struct procinfo *proc;
-	struct conninfo *conn;
-	int i;
-
-	if ((proc = search_proc(pid, 0)) == NULL)
-		return;
-	if ((conn = proc->pr_conn) == NULL)
-		return;
-	for (i = 0; i < conn->co_numchild; ++i)
-		if (conn->co_proc[i] == proc) {
-			conn->co_proc[i] = conn->co_proc[--conn->co_numchild];
-			break;
-		}
-	free_proc(proc);
-	free_conn(conn);
-}
-
-static void
 resize_conn(struct servtab *sep, int maxpip)
 {
 	struct conninfo *conn;
Index: src/usr.sbin/inetd/inetd.h
diff -u src/usr.sbin/inetd/inetd.h:1.14 src/usr.sbin/inetd/inetd.h:1.13.1000.2
--- src/usr.sbin/inetd/inetd.h:1.14	Sat Aug  7 12:27:50 2004
+++ src/usr.sbin/inetd/inetd.h	Sat Aug  7 22:07:53 2004
@@ -74,7 +74,10 @@
 	int	se_maxchild;		/* max number of children */
 	int	se_maxcpm;		/* max connects per IP per minute */
 	int	se_numchild;		/* current number of children */
+	int	se_free;		/* free when numchild == 0 */
+#ifdef SANITY_CHECK
 	pid_t	*se_pids;		/* array of child pids */
+#endif
 	char	*se_user;		/* user name to run as */
 	char    *se_group;              /* group name to run as */
 #ifdef  LOGIN_CAP
@@ -145,3 +148,14 @@
 	int	bi_maxchild;		/* max number of children, -1=default */
 	bi_fn_t	*bi_fn;			/* function which performs it */
 };
+
+void		watch __P((short, uintptr_t, void *, u_int, int));
+#define	WATCH_SOCK(fd, data)	watch(EVFILT_READ, fd, data, 0, 1)
+#define	UNWATCH_SOCK(fd, data)	watch(EVFILT_READ, fd, data, 0, 0)
+#define	WATCH_SIG(sig, data)	watch(EVFILT_SIGNAL, sig, data, 0, 1)
+#define	UNWATCH_SIG(sig, data)	watch(EVFILT_SIGNAL, sig, data, 0, 0)
+#define	WATCH_PROC(proc, data)	 watch(EVFILT_PROC, proc, data, NOTE_EXIT, 1)
+#define	UNWATCH_PROC(proc, data) watch(EVFILT_PROC, proc, data, NOTE_EXIT, 0)
+#define	WATCH_FD(fd, data)	watch(EVFILT_VNODE, fd, data, NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_RENAME, 1)
+#define	UNWATCH_FD(fd, data)	watch(EVFILT_VNODE, fd, data, NOTE_DELETE|NOTE_WRITE|NOTE_EXTEND|NOTE_RENAME, 0)
+