$Phantasm: delphijweb/research/freebsd/pid.diff,v 1.20 2004/08/14 18:42:10 delphij Exp $

Updated junsu's port of NetBSD's PID allocator. You can obtain the original
patch from his homepage: http://www.arbornet.org/~junsu/pid.diff

Index: src/sys/kern/init_main.c
diff -u src/sys/kern/init_main.c:1.245 src/sys/kern/init_main.c:1.242.1000.4
--- src/sys/kern/init_main.c:1.245	Thu Jul 29 04:21:04 2004
+++ src/sys/kern/init_main.c	Thu Jul 29 07:51:01 2004
@@ -87,7 +87,7 @@
 
 /* Components of the first process -- never freed. */
 static struct session session0;
-static struct pgrp pgrp0;
+struct pgrp pgrp0;
 struct	proc proc0;
 struct	thread thread0;
 struct	kse kse0;
@@ -358,10 +358,8 @@
 	 * Create process 0 (the swapper).
 	 */
 	LIST_INSERT_HEAD(&allproc, p, p_list);
-	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 	p->p_pgrp = &pgrp0;
-	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
 	LIST_INIT(&pgrp0.pg_members);
 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
 
Index: src/sys/kern/kern_exit.c
diff -u src/sys/kern/kern_exit.c:1.244 src/sys/kern/kern_exit.c:1.229.1000.16
--- src/sys/kern/kern_exit.c:1.244	Sat Jul 31 04:31:01 2004
+++ src/sys/kern/kern_exit.c	Sat Jul 31 12:20:42 2004
@@ -75,7 +75,6 @@
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
-#include <vm/uma.h>
 #include <sys/user.h>
 
 /* Required to be non-static for SysVR4 emulator */
@@ -401,7 +400,6 @@
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
-	LIST_REMOVE(p, p_hash);
 	sx_xunlock(&allproc_lock);
 
 	sx_xlock(&proctree_lock);
@@ -696,10 +694,7 @@
 #endif
 			KASSERT(FIRST_THREAD_IN_PROC(p),
 			    ("kern_wait: no residual thread!"));
-			uma_zfree(proc_zone, p);
-			sx_xlock(&allproc_lock);
-			nprocs--;
-			sx_xunlock(&allproc_lock);
+			proc_free(p);
 			return (0);
 		}
 		mtx_lock_spin(&sched_lock);
Index: src/sys/kern/kern_fork.c
diff -u src/sys/kern/kern_fork.c:1.233 src/sys/kern/kern_fork.c:1.226.1000.7
--- src/sys/kern/kern_fork.c:1.233	Tue Aug 10 02:21:12 2004
+++ src/sys/kern/kern_fork.c	Wed Aug 11 00:15:27 2004
@@ -69,7 +69,6 @@
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
-#include <vm/uma.h>
 
 #include <sys/user.h>
 #include <machine/critical.h>
@@ -145,48 +144,6 @@
 	return (error);
 }
 
-int	nprocs = 1;		/* process 0 */
-int	lastpid = 0;
-SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 
-    "Last used PID");
-
-/*
- * Random component to lastpid generation.  We mix in a random factor to make
- * it a little harder to predict.  We sanity check the modulus value to avoid
- * doing it in critical paths.  Don't let it be too small or we pointlessly
- * waste randomness entropy, and don't let it be impossibly large.  Using a
- * modulus that is too big causes a LOT more process table scans and slows
- * down fork processing as the pidchecked caching is defeated.
- */
-static int randompid = 0;
-
-static int
-sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
-{
-	int error, pid;
-
-	error = sysctl_wire_old_buffer(req, sizeof(int));
-	if (error != 0)
-		return(error);
-	sx_xlock(&allproc_lock);
-	pid = randompid;
-	error = sysctl_handle_int(oidp, &pid, 0, req);
-	if (error == 0 && req->newptr != NULL) {
-		if (pid < 0 || pid > PID_MAX - 100)	/* out of range */
-			pid = PID_MAX - 100;
-		else if (pid < 2)			/* NOP */
-			pid = 0;
-		else if (pid < 100)			/* Make it reasonable */
-			pid = 100;
-		randompid = pid;
-	}
-	sx_xunlock(&allproc_lock);
-	return (error);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
-
 int
 fork1(td, flags, pages, procp)
 	struct thread *td;
@@ -196,9 +153,7 @@
 {
 	struct proc *p1, *p2, *pptr;
 	uid_t uid;
-	struct proc *newproc;
-	int ok, trypid;
-	static int curfail, pidchecked = 0;
+	static int curfail;
 	static struct timeval lastfail;
 	struct filedesc *fd;
 	struct filedesc_to_leader *fdtol;
@@ -283,137 +238,20 @@
 	}
 
 	/* Allocate new proc. */
-	newproc = uma_zalloc(proc_zone, M_WAITOK);
-#ifdef MAC
-	mac_init_proc(newproc);
-#endif
-
-	/* We have to lock the process tree while we look for a pid. */
 	sx_slock(&proctree_lock);
 
-	/*
-	 * Although process entries are dynamically created, we still keep
-	 * a global limit on the maximum number we will create.  Don't allow
-	 * a nonprivileged user to use the last ten processes; don't let root
-	 * exceed the limit. The variable nprocs is the current number of
-	 * processes, maxproc is the limit.
-	 */
-	sx_xlock(&allproc_lock);
-	uid = td->td_ucred->cr_ruid;
-	if ((nprocs >= maxproc - 10 &&
-	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
-	    nprocs >= maxproc) {
+	p2 = proc_alloc(td, flags);
+	if (!p2) {
 		error = EAGAIN;
 		goto fail;
 	}
 
-	/*
-	 * Increment the count of procs running with this uid. Don't allow
-	 * a nonprivileged user to exceed their current limit.
-	 */
-	PROC_LOCK(p1);
-	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
-		(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
-	PROC_UNLOCK(p1);
-	if (!ok) {
-		error = EAGAIN;
-		goto fail;
-	}
-
-	/*
-	 * Increment the nprocs resource before blocking can occur.  There
-	 * are hard-limits as to the number of processes that can run.
-	 */
-	nprocs++;
-
-	/*
-	 * Find an unused process ID.  We remember a range of unused IDs
-	 * ready to use (from lastpid+1 through pidchecked-1).
-	 *
-	 * If RFHIGHPID is set (used during system boot), do not allocate
-	 * low-numbered pids.
-	 */
-	trypid = lastpid + 1;
-	if (flags & RFHIGHPID) {
-		if (trypid < 10)
-			trypid = 10;
-	} else {
-		if (randompid)
-			trypid += arc4random() % randompid;
-	}
-retry:
-	/*
-	 * If the process ID prototype has wrapped around,
-	 * restart somewhat above 0, as the low-numbered procs
-	 * tend to include daemons that don't exit.
-	 */
-	if (trypid >= PID_MAX) {
-		trypid = trypid % PID_MAX;
-		if (trypid < 100)
-			trypid += 100;
-		pidchecked = 0;
-	}
-	if (trypid >= pidchecked) {
-		int doingzomb = 0;
-
-		pidchecked = PID_MAX;
-		/*
-		 * Scan the active and zombie procs to check whether this pid
-		 * is in use.  Remember the lowest pid that's greater
-		 * than trypid, so we can avoid checking for a while.
-		 */
-		p2 = LIST_FIRST(&allproc);
-again:
-		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
-			PROC_LOCK(p2);
-			while (p2->p_pid == trypid ||
-			    (p2->p_pgrp != NULL &&
-			    (p2->p_pgrp->pg_id == trypid ||
-			    (p2->p_session != NULL &&
-			    p2->p_session->s_sid == trypid)))) {
-				trypid++;
-				if (trypid >= pidchecked) {
-					PROC_UNLOCK(p2);
-					goto retry;
-				}
-			}
-			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
-				pidchecked = p2->p_pid;
-			if (p2->p_pgrp != NULL) {
-				if (p2->p_pgrp->pg_id > trypid &&
-				    pidchecked > p2->p_pgrp->pg_id)
-					pidchecked = p2->p_pgrp->pg_id;
-				if (p2->p_session != NULL &&
-				    p2->p_session->s_sid > trypid &&
-				    pidchecked > p2->p_session->s_sid)
-					pidchecked = p2->p_session->s_sid;
-			}
-			PROC_UNLOCK(p2);
-		}
-		if (!doingzomb) {
-			doingzomb = 1;
-			p2 = LIST_FIRST(&zombproc);
-			goto again;
-		}
-	}
+#ifdef MAC
+	mac_init_proc(p2);
+#endif
 	sx_sunlock(&proctree_lock);
 
 	/*
-	 * RFHIGHPID does not mess with the lastpid counter during boot.
-	 */
-	if (flags & RFHIGHPID)
-		pidchecked = 0;
-	else
-		lastpid = trypid;
-
-	p2 = newproc;
-	p2->p_state = PRS_NEW;		/* protect against others */
-	p2->p_pid = trypid;
-	LIST_INSERT_HEAD(&allproc, p2, p_list);
-	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
-	sx_xunlock(&allproc_lock);
-
-	/*
 	 * Malloc things while we don't hold any locks.
 	 */
 	if (flags & RFSIGSHARE)
@@ -751,15 +589,11 @@
 	*procp = p2;
 	return (0);
 fail:
+	uid = td->td_ucred->cr_ruid;
 	sx_sunlock(&proctree_lock);
 	if (ppsratecheck(&lastfail, &curfail, 1))
 		printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",
 			uid);
-	sx_xunlock(&allproc_lock);
-#ifdef MAC
-	mac_destroy_proc(newproc);
-#endif
-	uma_zfree(proc_zone, newproc);
 	if (p1->p_flag & P_SA) {
 		PROC_LOCK(p1);
 		thread_single_end();
Index: src/sys/kern/kern_proc.c
diff -u src/sys/kern/kern_proc.c:1.215 src/sys/kern/kern_proc.c:1.202.1000.20
--- src/sys/kern/kern_proc.c:1.215	Sun Aug 15 01:15:16 2004
+++ src/sys/kern/kern_proc.c	Sun Aug 15 01:54:50 2004
@@ -37,6 +37,7 @@
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
+#include <sys/unistd.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
@@ -80,12 +81,51 @@
 static void proc_fini(void *mem, int size);
 
 /*
+ * pid to proc lookup is done by indexing the pid_table array.
+ * Since pid numbers are only allocated when an empty slot
+ * has been found, there is no need to search any lists ever.
+ * (an orphaned pgrp will lock the slot, a session will lock
+ * the pgrp with the same number).
+ *
+ * If the table is too small it is reallocated with twice the
+ * previous size and the entries 'unzipped' into the two halves.
+ * A linked list of free entries is passed through the pt_proc
+ * field of 'free' items - set odd to be an invalid ptr.
+ */
+
+struct pid_table {
+	struct proc* pt_proc;
+	struct pgrp* pt_pgrp;
+};
+
+#if 1	/* strongly typed cast - should be a noop */
+static __inline intptr_t p2u(struct proc *p) { return (intptr_t)p; };
+#else
+#define p2u(p) ((intptr_t)p)
+#endif
+
+#define P_VALID(p) (!(p2u(p) & 1))
+#define P_NEXT(p) (p2u(p) >> 1)
+#define P_FREE(pid) ((struct proc *)((pid) << 1 | 1))
+
+#define INITIAL_PID_TABLE_SIZE  (1 << 7)
+#define RESERVED_PROCSLOT 10
+
+static struct pid_table *pid_table;
+
+static u_int pid_tbl_mask = (INITIAL_PID_TABLE_SIZE) - 1;	/* table size 2^n */
+static u_int pid_alloc_lim;	/* max we allocate before growing table */
+static u_int pid_alloc_cnt = 0;
+
+/* links through free slots - never empty! */
+static u_int next_free_pt, last_free_pt, next_free_pt_highid;
+static pid_t pid_max = 1 << 12;	/* largest value we alocate */
+
+static int randompid = 0;
+
+/*
  * Other process lists
  */
-struct pidhashhead *pidhashtbl;
-u_long pidhash;
-struct pgrphashhead *pgrphashtbl;
-u_long pgrphash;
 struct proclist allproc;
 struct proclist zombproc;
 struct sx allproc_lock;
@@ -95,6 +135,9 @@
 uma_zone_t proc_zone;
 uma_zone_t ithread_zone;
 
+int     nprocs = 1;             /* process 0 */
+int     lastpid = 0;
+
 int kstack_pages = KSTACK_PAGES;
 int uarea_pages = UAREA_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0, "");
@@ -105,26 +148,287 @@
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 
 /*
- * Initialize global process hashing structures.
+ * Initialize global process mapping structures.
  */
 void
 procinit()
 {
 
+	int i;
 	sx_init(&allproc_lock, "allproc");
 	sx_init(&proctree_lock, "proctree");
 	mtx_init(&pargs_ref_lock, "struct pargs.ref", NULL, MTX_DEF);
 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
+
+	MALLOC(pid_table, struct pid_table *,
+		   INITIAL_PID_TABLE_SIZE * sizeof *pid_table, M_PROC, M_WAITOK);
+
+#define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
+	/*
+	 * Set free list running through table...
+	 * Preset 'use count' to -1 so we allocate pid 1 next.
+	 */
+	for (i = 0; i <= pid_tbl_mask; i++) {
+		pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
+		pid_table[i].pt_pgrp = 0;
+	}
+
+	/* slot 0 is just grabbed */
+	next_free_pt = 1;
+	next_free_pt_highid = RESERVED_PROCSLOT;
+	pid_table[0].pt_proc = &proc0;
+	pid_table[0].pt_pgrp = &pgrp0;
+
+	/* Need to fix fix last entry. */
+	last_free_pt = pid_tbl_mask;
+	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
+
+	/* point at which we grow table - to avoid reusing pids too often */
+	pid_alloc_lim = pid_tbl_mask - 1;
+#undef LINK_EMPTY
+	
 	LIST_INIT(&allproc);
 	LIST_INIT(&zombproc);
-	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
-	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
 }
 
+static void
+expand_pid_table(void) 
+{ 
+	u_int pt_size = pid_tbl_mask + 1;
+	struct pid_table *n_pt, *new_pt;
+	struct proc *proc;
+	struct pgrp *pgrp;
+	int i;
+	pid_t pid;
+
+	new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
+
+	sx_xlock(&allproc_lock);
+	if (pt_size != pid_tbl_mask + 1) {
+		/* Another process beat us to it... */
+		sx_xunlock(&allproc_lock);
+		FREE(new_pt, M_PROC);
+		return;
+	}
+
+	/*
+	 * Copy entries from old table into new one.
+	 * If 'pid' is 'odd' we need to place in the upper half,
+	 * even pid's to the lower half.
+	 *
+	 * Free items stay in the low half so we don't have to
+	 * fixup the reference to them.
+	 *
+	 * We stuff free items on the front of the freelist
+	 * because we can't write to unmodified entries.
+	 *
+	 * Processing the table backwards maintians a semblance
+	 * of issueing pid numbers that increase with time.
+	 */
+	i = pt_size - 1;
+	n_pt = new_pt + i;
+	for (; ; i--, n_pt--) {
+		proc = pid_table[i].pt_proc;
+		pgrp = pid_table[i].pt_pgrp;
+		if (!P_VALID(proc)) {
+			/* Up 'use count' so that link is valid */
+			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
+			proc = P_FREE(pid);
+			if (pgrp)
+				pid = pgrp->pg_id;
+		} else 
+			pid = proc->p_pid;
+
+		/* Save entry in appropriate half of table */
+		n_pt[pid & pt_size].pt_proc = proc;
+		n_pt[pid & pt_size].pt_pgrp = pgrp;
+
+		/* Put other piece on start of free list */
+		pid = (pid ^ pt_size) & ~pid_tbl_mask;
+		n_pt[pid & pt_size].pt_proc =
+			P_FREE((pid & ~pt_size) | next_free_pt);
+		n_pt[pid & pt_size].pt_pgrp = 0;
+		next_free_pt = i | (pid & pt_size);
+		if (i == 0)
+			break;
+	}
+ 
+	/* Switch tables */
+	n_pt = pid_table;
+	pid_table = new_pt;
+	pid_tbl_mask = pt_size * 2 - 1;
+
+	/*
+	 * pid_max starts as 1 >> 12 (4096), once we have 2048
+	 * allocated pids we need it to be larger!
+	 */
+	if (pid_tbl_mask > pid_max) {
+		pid_max = pid_tbl_mask * 2 + 1;
+		pid_alloc_lim |= pid_alloc_lim << 1;
+		if (pid_max > PID_MAX)
+			pid_max = PID_MAX;
+	} else
+		pid_alloc_lim <<= 1;    /* doubles number of free slots... */
+
+	sx_xunlock(&allproc_lock);
+	FREE(n_pt, M_PROC);
+}
+
+/*
+ * Allocate a free proc structure. This method is called from fork1.
+ *
+ * Expand the mapping table when needed.
+ */
+struct proc *
+proc_alloc(struct thread *td, int flags)
+{
+	struct proc *p, *p1;
+	int nxt;
+	pid_t pid;
+	struct pid_table *pt;
+	int ok;
+	uid_t uid;
+	int highpid;
+
+	p = uma_zalloc(proc_zone, M_WAITOK);
+	p->p_state = PRS_NEW;                       /* protect against others */
+    
+	/* allocate next free pid */
+	for (;;expand_pid_table()) { 
+		highpid = 0;
+		if (pid_alloc_cnt >= pid_alloc_lim)
+			/* ensure pids cycle through 2000+ values */
+			continue;
+		sx_xlock(&allproc_lock);
+		if ((flags & RFHIGHPID) && next_free_pt < RESERVED_PROCSLOT) {
+			highpid = 1;
+			pt = &pid_table[next_free_pt_highid];
+			nxt = P_NEXT(pt->pt_proc);
+			if (nxt & pid_tbl_mask) {
+				/* Fix the free list link */
+				if (!P_VALID(pid_table[RESERVED_PROCSLOT - 1].pt_proc))
+					pid_table[RESERVED_PROCSLOT - 1].pt_proc =
+						P_FREE(P_NEXT(pid_table[RESERVED_PROCSLOT - 1].pt_proc)
+							   | (nxt & pid_tbl_mask));
+				/* XXX: Shouldn't we unlock allproc_lock? */
+				break;
+			}
+		} else {
+			pt = &pid_table[next_free_pt];
+			nxt = P_NEXT(pt->pt_proc);
+			/* XXX: Shouldn't we unlock allproc_lock at the break? */
+			if (nxt & pid_tbl_mask)
+				break;
+		}
+		/* XXX: Shouldn't we unlock allproc_lock before break? */
+		if (nxt & pid_tbl_mask)
+			break;
+		/* Table full - expand (NB last entry not used....) */
+		sx_xunlock(&allproc_lock);
+	}
+
+	KASSERT((!P_VALID(pt->pt_proc)), ("Proc slot is not free"));
+	/* pid is 'saved use count' + 'size' + entry */
+	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1
+		+ (highpid ? next_free_pt_highid : next_free_pt);
+
+	/* 
+	 * Handle this now, so that we don't have to grab the allproc lock
+	 * again later in fork1().
+	 *
+	 * Although process entries are dynamically created, we still keep
+	 * a global limit on the maximum number we will create.  Don't allow
+	 * a nonprivileged user to use the last ten processes; don't let root
+	 * exceed the limit. The variable nprocs is the current number of
+	 * processes, maxproc is the limit.
+	 */
+	uid = td->td_ucred->cr_ruid;
+	if ((nprocs >= maxproc - 10 &&
+	    suser_cred(td->td_ucred, SUSER_RUID) != 0) ||
+	    nprocs >= maxproc)
+		goto bad;
+
+	/*
+	 * Increment the nprocs resource before blocking can occur. There
+	 * are hard-limits as to the number of processes that can run.
+	 */
+	p1 = td->td_proc;
+	PROC_LOCK(p1);
+	ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
+					(uid != 0) ? lim_cur(p1, RLIMIT_NPROC) : 0);
+	PROC_UNLOCK(p1);
+	if (!ok)
+		goto bad;
+
+	if (randompid) {
+		pid += (arc4random() % randompid) * (pid_tbl_mask + 1);
+	}
+
+	if ((u_int)pid > (u_int)pid_max)
+		pid &= pid_tbl_mask;
+
+	p->p_pid = pid;
+	if (highpid)
+		next_free_pt_highid = nxt & pid_tbl_mask;
+	else
+		next_free_pt = nxt & pid_tbl_mask;
+
+	/* Grab table slot */
+	pt->pt_proc = p;
+	pid_alloc_cnt++; 
+
+	/*
+	 * RFHIGHPID does not mess with the lastpid counter during boot.
+	 */
+	if (!(flags & RFHIGHPID))
+		lastpid = pid;
+
+	LIST_INSERT_HEAD(&allproc, p, p_list);
+	nprocs++;
+
+	sx_xunlock(&allproc_lock);
+
+	return p;
+
+bad:
+	uma_zfree(proc_zone, p);
+	sx_xunlock(&allproc_lock);
+	return (NULL);
+}
+
+/*
+ * Free last resources of a process - called from kern_wait (in kern_exit.c)
+ */
+void
+proc_free(struct proc *p)
+{
+	pid_t pid = p->p_pid;
+	struct pid_table *pt;
+
+	sx_xlock(&allproc_lock);
+
+	pt = &pid_table[pid & pid_tbl_mask];
+	/* save pid use count in slot */
+	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
+
+	if (pt->pt_pgrp == NULL) {
+		/* link last freed entry onto ours */
+		pid &= pid_tbl_mask;
+		pt = &pid_table[last_free_pt];
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
+		last_free_pt = pid;
+		pid_alloc_cnt--;
+	}
+ 
+	nprocs--;
+	sx_xunlock(&allproc_lock);
+	uma_zfree(proc_zone, p);
+}
+
 /*
  * Prepare a proc for use.
  */
@@ -254,15 +558,33 @@
 	register struct proc *p;
 
 	sx_slock(&allproc_lock);
-	LIST_FOREACH(p, PIDHASH(pid), p_hash)
-		if (p->p_pid == pid) {
-			if (p->p_state == PRS_NEW) {
-				p = NULL;
-				break;
-			}
-			PROC_LOCK(p);
-			break;
-		}
+	p = pid_table[pid & pid_tbl_mask].pt_proc;
+	/* Only allow live processes to be found by pid. */
+	if (!P_VALID(p) || p->p_pid != pid || p->p_state == PRS_NEW)
+		p = NULL;
+	else
+		PROC_LOCK(p);
+	/* XXX MP - need to have a reference count... */
+	sx_sunlock(&allproc_lock);
+	return (p);
+}
+
+/*
+ * Locate a zombie process by number
+ */
+struct proc *
+zpfind(register pid_t pid)
+{
+	register struct proc *p;
+
+	sx_slock(&allproc_lock);
+	p = pid_table[pid & pid_tbl_mask].pt_proc;
+	/* Only allow zombie processes to be found by pid. */
+	if (!P_VALID(p) || p->p_pid != pid || p->p_state != PRS_ZOMBIE)
+		p = NULL;
+	else
+		PROC_LOCK(p);
+	/* XXX MP - need to have a reference count... */
 	sx_sunlock(&allproc_lock);
 	return (p);
 }
@@ -277,15 +599,21 @@
 {
 	register struct pgrp *pgrp;
 
-	sx_assert(&proctree_lock, SX_LOCKED);
+	sx_slock(&allproc_lock);
+	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
 
-	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
-		if (pgrp->pg_id == pgid) {
-			PGRP_LOCK(pgrp);
-			return (pgrp);
-		}
-	}
-	return (NULL);
+	/*
+	 * Can't look up a pgrp that only exists because the session 
+	 * hasn't died yet (traditional) 
+	 */
+	if (pgrp == NULL || pgrp->pg_id != pgid 
+		|| LIST_EMPTY(&pgrp->pg_members)) 
+		pgrp = NULL; 
+	else
+		PGRP_LOCK(pgrp);
+	/* XXX MP - need to have a reference count... */ 
+	sx_sunlock(&allproc_lock);
+	return pgrp; 
 }
 
 /*
@@ -350,7 +678,7 @@
 	 * As we have an exclusive lock of proctree_lock,
 	 * this should not deadlock.
 	 */
-	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
+	pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
 	pgrp->pg_jobc = 0;
 	SLIST_INIT(&pgrp->pg_sigiolst);
 	PGRP_UNLOCK(pgrp);
@@ -449,6 +777,31 @@
 }
 
 /*
+ * remove the pg from the PIDTable
+ */
+static void
+pgunlink(pid_t pg_id)
+{
+	struct pgrp *pgrp; 
+	struct pid_table *pt; 
+    
+	sx_assert(&proctree_lock, SX_XLOCKED);
+	pt = &pid_table[pg_id & pid_tbl_mask]; 
+	pgrp = pt->pt_pgrp; 
+	pt->pt_pgrp = 0; 
+    
+	if (!P_VALID(pt->pt_proc)) { 
+		/* orphaned pgrp, put slot onto free list */     
+		pg_id &= pid_tbl_mask; 
+		pt = &pid_table[last_free_pt]; 
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id); 
+		last_free_pt = pg_id; 
+		pid_alloc_cnt--; 
+	} 
+  
+}
+
+/*
  * delete a process group
  */
 static void
@@ -472,7 +825,6 @@
 	if (pgrp->pg_session->s_ttyp != NULL &&
 	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
 		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
-	LIST_REMOVE(pgrp, pg_hash);
 	savesess = pgrp->pg_session;
 	SESS_LOCK(savesess);
 	i = --savesess->s_count;
@@ -483,6 +835,11 @@
 			ttyrel(savesess->s_ttyp);
 		mtx_destroy(&savesess->s_mtx);
 		FREE(savesess, M_SESSION);
+		pgunlink(pgrp->pg_id);
+	}
+	else {
+		if (savesess->s_sid != pgrp->pg_id)
+			pgunlink(pgrp->pg_id);
 	}
 	mtx_destroy(&pgrp->pg_mtx);
 	FREE(pgrp, M_PGRP);
@@ -506,6 +863,22 @@
 }
 
 /*
+ * Delete session - called from SESSRELE when s_count becomes zero.
+ */
+void
+sessdelete(struct session *ss)
+{
+	/*
+	 * We keep the pgrp with the same id as the session in
+	 * order to stop a process being given the same pid.
+	 * Since the pgrp holds a reference to the session, it
+	 * must be a 'zombie' pgrp by now.
+	 */
+	pgunlink(ss->s_sid);
+	FREE(ss, M_SESSION);
+}
+
+/*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"
  * the group for terminal job control (those with a parent in a different
@@ -594,24 +967,39 @@
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	register struct pgrp *pgrp;
+	register struct pid_table *pt;
 	register struct proc *p;
-	register int i;
+	int id;
+	int quit = 0;
 
-	for (i = 0; i <= pgrphash; i++) {
-		if (!LIST_EMPTY(&pgrphashtbl[i])) {
-			printf("\tindx %d\n", i);
-			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
-				printf(
-			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
-				    (void *)pgrp, (long)pgrp->pg_id,
-				    (void *)pgrp->pg_session,
-				    pgrp->pg_session->s_count,
-				    (void *)LIST_FIRST(&pgrp->pg_members));
-				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
-					printf("\t\tpid %ld addr %p pgrp %p\n", 
-					    (long)p->p_pid, (void *)p,
-					    (void *)p->p_pgrp);
-				}
+	db_setup_paging(db_simple_pager, &quit, DB_LINES_PER_PAGE);
+	printf("pid table %p size %x, next %x, last %x\n",
+		   pid_table, pid_tbl_mask+1,
+		   next_free_pt, last_free_pt);
+	for (pt = pid_table, id = 0; id <= pid_tbl_mask && !quit; id++, pt++) {
+		p = pt->pt_proc;
+		if (!P_VALID(p) && !pt->pt_pgrp)
+			continue;
+		db_printf("  id %x: ", id);
+		if (P_VALID(p))
+			db_printf("proc %p id %d (0x%x) %s\n",
+					  p, p->p_pid, p->p_pid, p->p_comm);
+		else
+			db_printf("next %x use %x\n",
+					  P_NEXT(p) & pid_tbl_mask,
+					  P_NEXT(p) & ~pid_tbl_mask);
+		if ((pgrp = pt->pt_pgrp)) {
+			db_printf("\tsession %p, sid %d, count %d, login %s\n",
+					  pgrp->pg_session, pgrp->pg_session->s_sid,
+					  pgrp->pg_session->s_count,
+					  pgrp->pg_session->s_login);
+			db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
+					  pgrp, pgrp->pg_id, pgrp->pg_jobc,
+					  pgrp->pg_members.lh_first);
+			for (p = pgrp->pg_members.lh_first; p != 0 && !quit;
+				 p = p->p_pglist.le_next) {
+				db_printf("\t\tpid %d addr %p pgrp %p %s\n",
+						  p->p_pid, p, p->p_pgrp, p->p_comm);
 			}
 		}
 	}
@@ -835,24 +1223,6 @@
 		kp->ki_ppid = p->p_pptr->p_pid;
 }
 
-/*
- * Locate a zombie process by number
- */
-struct proc *
-zpfind(pid_t pid)
-{
-	struct proc *p;
-
-	sx_slock(&allproc_lock);
-	LIST_FOREACH(p, &zombproc, p_list)
-		if (p->p_pid == pid) {
-			PROC_LOCK(p);
-			break;
-		}
-	sx_sunlock(&allproc_lock);
-	return (p);
-}
-
 #define KERN_PROC_ZOMBMASK	0x3
 #define KERN_PROC_NOTHREADS	0x4
 
@@ -1205,6 +1575,7 @@
 	return (sysctl_handle_string(oidp, sv_name, 0, req));
 }
 
+SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, "Last used PID");
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
@@ -1270,3 +1641,54 @@
 
 SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
 	CTLFLAG_RD, sysctl_kern_proc, "Return process table, no threads");
+
+/*
+ * Random component to lastpid generation.  We mix in a random factor to make
+ * it a little harder to predict.  We sanity check the modulus value to avoid
+ * doing it in critical paths.  Don't let it be too small or we pointlessly
+ * waste randomness entropy, and don't let it be impossibly large.  Using a
+ * modulus that is too big causes a LOT more process table scans and slows
+ * down fork processing as the pidchecked caching is defeated.
+ */
+
+static int
+sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
+{
+	int error, pid;
+
+	sysctl_wire_old_buffer(req, sizeof(int));
+	pid = randompid;
+	error = sysctl_handle_int(oidp, &pid, 0, req);
+	if (error == 0 && req->newptr != NULL) {
+		if (pid < 0 || pid > 100)     /* out of range */
+			pid = 100;
+		else if (pid < 2)                       /* NOP */
+			pid = 0;
+		randompid = pid;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
+			0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
+
+static int
+sysctl_kern_currentmaxpid(SYSCTL_HANDLER_ARGS)
+{
+	int error, maxpid;
+
+	maxpid = pid_max;
+	sysctl_wire_old_buffer(req, sizeof(int));
+	error = sysctl_handle_int(oidp, &maxpid, 0, req);
+	if (error == 0 && req->newptr != NULL) {
+		if (maxpid > pid_tbl_mask * 2 + 1 && maxpid <= PID_MAX)
+			pid_max = maxpid;
+		else
+			error = EINVAL;
+	}
+	return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, maxpid, CTLTYPE_INT|CTLFLAG_RW,
+            0, 0, sysctl_kern_currentmaxpid, "I", "Current Maxium PID");
+
Index: src/sys/sys/proc.h
diff -u src/sys/sys/proc.h:1.391 src/sys/sys/proc.h:1.375.1000.18
--- src/sys/sys/proc.h:1.391	Tue Aug 10 07:24:22 2004
+++ src/sys/sys/proc.h	Sat Aug 14 19:48:22 2004
@@ -88,7 +88,6 @@
  * (c)		const until freeing
  */
 struct pgrp {
-	LIST_ENTRY(pgrp) pg_hash;	/* (e) Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* (m + e) Pointer to pgrp members. */
 	struct session	*pg_session;	/* (c) Pointer to session. */
 	struct sigiolst	pg_sigiolst;	/* (m) List of sigio sources. */
@@ -761,7 +760,7 @@
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s) {							\
 	if (--(s)->s_count == 0)					\
-		FREE(s, M_SESSION);					\
+		sessdelete(s);					        \
 }
 
 #define	STOPEVENT(p, e, v) do {						\
@@ -835,18 +834,11 @@
 #define	PARGS_LOCK(p)		mtx_lock(&pargs_ref_lock)
 #define	PARGS_UNLOCK(p)		mtx_unlock(&pargs_ref_lock)
 
-#define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
-extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
-extern u_long pidhash;
-
-#define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
-extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
-extern u_long pgrphash;
-
 extern struct sx allproc_lock;
 extern struct sx proctree_lock;
 extern struct mtx pargs_ref_lock;
 extern struct mtx ppeers_lock;
+extern struct pgrp pgrp0;		/* Process group for swapper. */
 extern struct proc proc0;		/* Process slot for swapper. */
 extern struct thread thread0;		/* Primary thread in proc0. */
 extern struct ksegrp ksegrp0;		/* Primary ksegrp in proc0. */
@@ -890,6 +882,11 @@
 int	leavepgrp(struct proc *p);
 int	maybe_preempt(struct thread *td);
 void	mi_switch(int flags, struct thread *newtd);
+
+void	sessdelete(struct session *);
+struct	proc *proc_alloc(struct thread *td, int flags);
+void	proc_free(struct proc *p);
+
 int	p_candebug(struct thread *td, struct proc *p);
 int	p_cansee(struct thread *td, struct proc *p);
 int	p_cansched(struct thread *td, struct proc *p);