[MLton-devel] cvs commit: generational GC

Stephen Weeks sweeks@users.sourceforge.net
Tue, 06 Aug 2002 18:02:43 -0700


sweeks      02/08/06 18:02:43

  Modified:    basis-library/misc primitive.sml
               include  ccodegen.h x86codegen.h
               mlton    Makefile mlton-stubs.cm
               mlton/atoms prim.fun prim.sig
               mlton/backend runtime.fun runtime.sig ssa-to-rssa.fun
               mlton/codegen/c-codegen c-codegen.fun
               mlton/codegen/x86-codegen x86-mlton.fun
               mlton/control control.sig control.sml
               mlton/main main.sml
               runtime  gc.c gc.h my-lib.c
  Log:
  This is the first checkin of a (mostly) working generational GC.  It still
  needs work, but passes all the regressions and self compiles to fixpoint,
  using the generational GC.  There is still at least one bug when compiling
  -generational false.
  
  Added two new compiler switches to control the generational GC:
  	-card-size-log2 n
  	-generational {true|false}
  
  Disabled -inline-array, because GC_allocateArray is at some point going to
  know about generations, and so array allocation can no longer be inlined.  At
  some point, we should probably delete the code in the compiler that does
  inline array allocation.
  
  Fixed a performance bug in heap resizing that really hurts when the amount of
  live data is more than half the RAM size.  The problem was that the entire
  block of live data was copied from one place to another, and then the old
  space was munmapped.  That meant that both copies of the live data had to
  exist in virtual memory simultaneously.  Now, the copy is done from high
  address to low address, 10M at a time, munmapping parts of the old space as we
  go.  That way, as long as the live data fits in RAM (give or take), no
  paging need occur.
  
  Turned Thread_current from a primitive into an FFI call.

Revision  Changes    Path
1.34      +1 -1      mlton/basis-library/misc/primitive.sml

Index: primitive.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/basis-library/misc/primitive.sml,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -r1.33 -r1.34
--- primitive.sml	29 Jul 2002 02:00:02 -0000	1.33
+++ primitive.sml	7 Aug 2002 01:02:42 -0000	1.34
@@ -581,7 +581,7 @@
 	     * switching to a copy.
 	     *)
 	    val copyCurrent = _prim "Thread_copyCurrent": unit -> unit;
-	    val current = _prim "Thread_current": unit -> thread;
+	    val current = _ffi "Thread_current": unit -> thread;
 	    val finishHandler = _ffi "Thread_finishHandler": unit -> unit;
 	    val saved = _ffi "Thread_saved": unit -> thread;
 	    val savedPre = _ffi "Thread_saved": unit -> preThread;



1.35      +6 -3      mlton/include/ccodegen.h

Index: ccodegen.h
===================================================================
RCS file: /cvsroot/mlton/mlton/include/ccodegen.h,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -r1.34 -r1.35
--- ccodegen.h	30 Jul 2002 18:30:52 -0000	1.34
+++ ccodegen.h	7 Aug 2002 01:02:42 -0000	1.35
@@ -118,12 +118,14 @@
 /*                       main                        */
 /* ------------------------------------------------- */
 
-#define Main(ufh, fhs, mfs, mfi, mot, mg, mc, ml)			\
-int main(int argc, char **argv) {					\
+#define Main(cs, ufh, fhs, g, mfs, mfi, mot, mg, mc, ml) 		\
+int main (int argc, char **argv) {					\
 	struct cont cont;						\
 	int l_nextFun;							\
+	gcState.cardSizeLog2 = cs;					\
 	gcState.fixedHeapSize = fhs;					\
 	gcState.frameLayouts = frameLayouts;				\
+	gcState.generational = g;					\
 	gcState.globals = globalpointer;				\
 	gcState.intInfInits = intInfInits;				\
 	gcState.loadGlobals = &loadGlobals;				\
@@ -272,7 +274,8 @@
 #define CheckPointer(p)							\
 	do {								\
 		assert (not GC_isPointer (p) or				\
-				(gcState.base <= p and p < frontier));	\
+				(gcState.heap.oldGen <= p 	        \
+					and p < gcState.heap.oldGen + gcState.heap.size)); \
 	} while (0)
 
 #define FlushFrontier()				\



1.15      +4 -2      mlton/include/x86codegen.h

Index: x86codegen.h
===================================================================
RCS file: /cvsroot/mlton/mlton/include/x86codegen.h,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- x86codegen.h	27 Jul 2002 20:52:05 -0000	1.14
+++ x86codegen.h	7 Aug 2002 01:02:42 -0000	1.15
@@ -64,12 +64,14 @@
 #define Float(c, f) globaldouble[c] = f;
 #define EndFloats }
 
-#define Main(ufh, fhs, mfs, mfi, mot, mg, ml, reserveEsp)		\
+#define Main(cs, ufh, fhs, g, mfs, mfi, mot, mg, ml, reserveEsp)	\
 extern pointer ml;							\
-int main(int argc, char **argv) {					\
+int main (int argc, char **argv) {					\
 	pointer jump;  							\
+	gcState.cardSizeLog2 = cs;					\
 	gcState.fixedHeapSize = fhs;					\
 	gcState.frameLayouts = frameLayouts;				\
+	gcState.generational = g;					\
 	gcState.globals = globalpointer;				\
 	gcState.intInfInits = intInfInits;				\
 	gcState.loadGlobals = &loadGlobals;				\



1.51      +1 -1      mlton/mlton/Makefile

Index: Makefile
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/Makefile,v
retrieving revision 1.50
retrieving revision 1.51
diff -u -r1.50 -r1.51
--- Makefile	11 Jul 2002 20:42:24 -0000	1.50
+++ Makefile	7 Aug 2002 01:02:42 -0000	1.51
@@ -4,7 +4,7 @@
 LIB = $(BUILD)/lib
 MLTON = mlton
 HOST = self
-FLAGS = @MLton gc-summary -- -host $(HOST) -v -o $(AOUT)
+FLAGS = @MLton gc-summary ---host $(HOST) -v -o $(AOUT)
 NAME = mlton
 AOUT = mlton-compile
 PATH = $(BIN):$(shell echo $$PATH)



1.4       +4 -1      mlton/mlton/mlton-stubs.cm

Index: mlton-stubs.cm
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/mlton-stubs.cm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- mlton-stubs.cm	6 Jul 2002 17:22:05 -0000	1.3
+++ mlton-stubs.cm	7 Aug 2002 01:02:42 -0000	1.4
@@ -5,22 +5,25 @@
 ../lib/mlyacc/parser2.sml
 ../lib/mlyacc/join.sml
 ../lib/mlton-stubs/thread.sml
+../lib/mlton-stubs/random.sig
+../lib/mlton-stubs/random.sml
 ../lib/mlton-stubs/world.sig
 ../lib/mlton-stubs/word.sig
 ../lib/mlton-stubs/vector.sig
 ../lib/mlton-stubs/thread.sig
+../lib/mlton-stubs/io.sig
 ../lib/mlton-stubs/text-io.sig
 ../lib/mlton-stubs/syslog.sig
 ../lib/mlton-stubs/socket.sig
 ../lib/mlton-stubs/signal.sig
 ../lib/mlton-stubs/rusage.sig
 ../lib/mlton-stubs/rlimit.sig
-../lib/mlton-stubs/random.sig
 ../lib/mlton-stubs/ptrace.sig
 ../lib/mlton-stubs/profile.sig
 ../lib/mlton-stubs/process.sig
 ../lib/mlton-stubs/proc-env.sig
 ../lib/mlton-stubs/array.sig
+../lib/mlton-stubs/bin-io.sig
 ../lib/mlton-stubs/cont.sig
 ../lib/mlton-stubs/exn.sig
 ../lib/mlton-stubs/gc.sig



1.35      +0 -2      mlton/mlton/atoms/prim.fun

Index: prim.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/atoms/prim.fun,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -r1.34 -r1.35
--- prim.fun	30 Jul 2002 19:15:52 -0000	1.34
+++ prim.fun	7 Aug 2002 01:02:42 -0000	1.35
@@ -153,7 +153,6 @@
        | Thread_canHandle
        | Thread_copy
        | Thread_copyCurrent
-       | Thread_current
        | Thread_switchTo
        | Vector_fromArray
        | Vector_length
@@ -376,7 +375,6 @@
 	  (Thread_canHandle, DependsOnState, "Thread_canHandle"),
 	  (Thread_copy, Moveable, "Thread_copy"),
 	  (Thread_copyCurrent, SideEffect, "Thread_copyCurrent"),
-	  (Thread_current, DependsOnState, "Thread_current"),
 	  (Thread_switchTo, SideEffect, "Thread_switchTo"),
 	  (Vector_fromArray, DependsOnState, "Vector_fromArray"),
 	  (Vector_length, Functional, "Vector_length"),



1.29      +0 -1      mlton/mlton/atoms/prim.sig

Index: prim.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/atoms/prim.sig,v
retrieving revision 1.28
retrieving revision 1.29
diff -u -r1.28 -r1.29
--- prim.sig	30 Jul 2002 19:15:53 -0000	1.28
+++ prim.sig	7 Aug 2002 01:02:42 -0000	1.29
@@ -158,7 +158,6 @@
 	     | Thread_canHandle (* implemented in backend *)
 	     | Thread_copy
 	     | Thread_copyCurrent
-	     | Thread_current
 	     (* switchTo has to be a _prim because we have to know that it
 	      * enters the runtime -- because everything must be saved
 	      * on the stack.



1.5       +0 -2      mlton/mlton/backend/runtime.fun

Index: runtime.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/runtime.fun,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- runtime.fun	30 Jul 2002 02:48:33 -0000	1.4
+++ runtime.fun	7 Aug 2002 01:02:42 -0000	1.5
@@ -147,8 +147,6 @@
 val arrayLengthOffset = ~ (2 * wordSize)
 val allocTooLarge: word = 0wxFFFFFFFC
 
-val bytesPerCardLog2: word = 0w8
-   
 fun normalSize {numPointers, numWordsNonPointers} =
    wordSize * (numPointers + numWordsNonPointers)
 



1.14      +0 -1      mlton/mlton/backend/runtime.sig

Index: runtime.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/runtime.sig,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- runtime.sig	30 Jul 2002 02:48:33 -0000	1.13
+++ runtime.sig	7 Aug 2002 01:02:42 -0000	1.14
@@ -69,7 +69,6 @@
       val arrayHeaderSize: int
       val arrayLengthOffset: int
       val array0Size: int
-      val bytesPerCardLog2: word
       val headerToTypeIndex: word -> int
       val isWordAligned: int -> bool
       val intInfOverheadSize: int



1.22      +5 -3      mlton/mlton/backend/ssa-to-rssa.fun

Index: ssa-to-rssa.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/ssa-to-rssa.fun,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- ssa-to-rssa.fun	30 Jul 2002 19:15:54 -0000	1.21
+++ ssa-to-rssa.fun	7 Aug 2002 01:02:42 -0000	1.22
@@ -882,7 +882,9 @@
 			      (PrimApp
 			       {args = (Vector.new2
 					(Operand.CastWord addr,
-					 Operand.word Runtime.bytesPerCardLog2)),
+					 Operand.word
+					 (Word.fromInt
+					  (!Control.cardSizeLog2)))),
 				dst = SOME (index, Type.int),
 				prim = Prim.word32Rshift})
 			      :: (Bind {isMutable = false,
@@ -900,7 +902,7 @@
 			  loop (i - 1, prefix ss, t)
 			end
 		     fun arrayUpdate (ty, src) =
-		        if Type.isPointer ty
+		        if !Control.generational andalso Type.isPointer ty
 			   then let
 				   val temp = Var.newNoname ()
 				   val tempOp = Operand.Var {var = temp,
@@ -942,7 +944,7 @@
 								    ty = ty},
 					      src = src}
 			in
-			   if Type.isPointer ty
+			   if !Control.generational andalso Type.isPointer ty
 			      then updateCard (varOp addr, fn ss => ss, assign)
 			   else loop (i - 1, assign::ss, t)
 			end



1.28      +5 -1      mlton/mlton/codegen/c-codegen/c-codegen.fun

Index: c-codegen.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/codegen/c-codegen/c-codegen.fun,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- c-codegen.fun	30 Jul 2002 02:48:33 -0000	1.27
+++ c-codegen.fun	7 Aug 2002 01:02:43 -0000	1.28
@@ -60,6 +60,8 @@
       val truee = "TRUE"
       val falsee = "FALSE"
 
+      fun bool b = if b then truee else falsee
+	 
       fun args (ss: string list): string
 	 = concat ("(" :: List.separate (ss, ", ") @ [")"])
          
@@ -290,8 +292,10 @@
 	    val magic = C.word (Random.useed ())
 	 in 
 	    C.callNoSemi ("Main",
-			  [usedFixedHeap,
+			  [C.int (!Control.cardSizeLog2),
+			   usedFixedHeap,
 			   C.int fixedHeapSize,
+			   C.bool (!Control.generational),
 			   C.int maxFrameSize,
 			   C.int maxFrameIndex,
 			   C.int (Vector.length objectTypes),



1.36      +0 -19     mlton/mlton/codegen/x86-codegen/x86-mlton.fun

Index: x86-mlton.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/codegen/x86-codegen/x86-mlton.fun,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -r1.35 -r1.36
--- x86-mlton.fun	11 Jul 2002 02:16:49 -0000	1.35
+++ x86-mlton.fun	7 Aug 2002 01:02:43 -0000	1.36
@@ -1308,25 +1308,6 @@
 	     | String_size => lengthArrayVectorString ()
 	     | String_toCharVector => mov ()
 	     | String_toWord8Vector => mov ()
-	     | Thread_current
-	     => let
-		  val (dst,dstsize) = getDst ()
-		  val _
-		    = Assert.assert
-		      ("applyPrim: Thread_current, dstsize",
-		       fn () => dstsize = pointerSize)
-		in
-		  AppendList.fromList
-		  [Block.T'
-		   {entry = NONE,
-		    profileInfo = ProfileInfo.none,
-		    statements 
-		    = [Assembly.instruction_mov
-		       {dst = dst,
-			src = gcState_currentThreadContentsOperand (),
-			size = wordSize}],
-		    transfer = NONE}]
-		end
 	     | Vector_length => lengthArrayVectorString ()
 	     | Word8_toInt => movx Instruction.MOVZX
 	     | Word8_toIntX => movx Instruction.MOVSX



1.49      +5 -0      mlton/mlton/control/control.sig

Index: control.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/control/control.sig,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -r1.48 -r1.49
--- control.sig	12 Jul 2002 18:53:17 -0000	1.48
+++ control.sig	7 Aug 2002 01:02:43 -0000	1.49
@@ -18,6 +18,8 @@
       (*            Begin Flags             *)
       (*------------------------------------*)
 
+      val cardSizeLog2: int ref
+
       datatype chunk =
 	 OneChunk
        | ChunkPerFunc
@@ -46,6 +48,9 @@
        | First
        | Every
       val gcCheck: gcCheck ref
+
+      (* Does the runtime use generational GC. *)
+      val generational: bool ref
 
       datatype host =
 	 Cross of string



1.61      +8 -0      mlton/mlton/control/control.sml

Index: control.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/control/control.sml,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- control.sml	12 Jul 2002 18:53:17 -0000	1.60
+++ control.sml	7 Aug 2002 01:02:43 -0000	1.61
@@ -11,6 +11,10 @@
 structure C = Control ()
 open C
 
+val cardSizeLog2 = control {name = "log2 (card size)",
+			    default = 8,
+			    toString = Int.toString}
+   
 structure Chunk =
    struct
       datatype t =
@@ -78,6 +82,10 @@
 val gcCheck = control {name = "gc check",
 		       default = Limit,
 		       toString = GcCheck.toString}
+
+val generational = control {name = "generational",
+			    default = true,
+			    toString = Bool.toString}
 
 structure Host =
    struct



1.74      +14 -8     mlton/mlton/main/main.sml

Index: main.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/main/main.sml,v
retrieving revision 1.73
retrieving revision 1.74
diff -u -r1.73 -r1.74
--- main.sml	19 Jul 2002 19:23:18 -0000	1.73
+++ main.sml	7 Aug 2002 01:02:43 -0000	1.74
@@ -90,6 +90,9 @@
        (Expert, "build-constants", "",
 	"output C file that prints basis constants",
 	trueRef buildConstants),
+       (Expert, "card-size-log2", " n",
+	"log (base 2) of card size used by GC",
+	intRef cardSizeLog2),
        (Expert, "coalesce", " n", "coalesce chunk size for C codegen",
 	Int (fn n => coalesce := SOME n)),
        (Expert, "debug", " {false|true}", "produce executable with debug info",
@@ -123,6 +126,8 @@
 		       | "first" => First
 		       | "every" => Every
 		       | _ => usage (concat ["invalid -gc-check flag: ", s])))),
+       (Expert, "generational", " {true|false}", "use generational GC",
+	boolRef Control.generational),
        (Normal, "host",
 	concat [" {",
 		concat (List.separate (List.map (hostMap (), #host), "|")),
@@ -138,9 +143,13 @@
  *)
        (Normal, "inline", " n", "inlining threshold",
 	Int setInlineSize),
-       (Expert, "inline-array", " {false|true}",
-	"inline array allocation",
-	boolRef inlineArrayAllocation),
+       (* -inline-array true is no longer allowed, because GC_arrayAllocate
+	* knows intimate details of the generational GC.
+	*)
+(*        (Expert, "inline-array", " {false|true}",
+ * 	"inline array allocation",
+ *	boolRef inlineArrayAllocation),
+ *)
 (*        (Normal, "I", "dir", "search dir for include files",
  * 	push includeDirs),
  *)
@@ -628,11 +637,8 @@
 			    docc = docc,
 			    outputC = make (Control.C, cOut),
 			    outputS = make (Control.Assembly, sOut)}
-			(* These collects are so that the heap is shrunk
-			 * as much as possible before calling gcc.
-			 *)
-			val _ = MLton.GC.collect ()
-			val _ = MLton.GC.collect ()
+			(* Shrink the heap before calling gcc. *)
+			val _ = MLton.GC.pack ()
 		     in
 			case stop of
 			   Place.Generated => ()



1.68      +456 -180  mlton/runtime/gc.c

Index: gc.c
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/gc.c,v
retrieving revision 1.67
retrieving revision 1.68
diff -u -r1.67 -r1.68
--- gc.c	30 Jul 2002 02:48:33 -0000	1.67
+++ gc.c	7 Aug 2002 01:02:43 -0000	1.68
@@ -43,9 +43,10 @@
 enum {
 	BOGUS_EXN_STACK = 0xFFFFFFFF,
 	BOGUS_POINTER = 0x1,
-	BYTES_PER_CARD = 256,
+	COPY_CHUNK_SIZE = 0x800000,
 	DEBUG = FALSE,
 	DEBUG_DETAILED = FALSE,
+	DEBUG_GENERATIONAL = FALSE,
 	DEBUG_MARK_COMPACT = FALSE,
 	DEBUG_MEM = FALSE,
 	DEBUG_RESIZING = FALSE,
@@ -55,6 +56,7 @@
 	FORWARDED = 0xFFFFFFFF,
 	HEADER_SIZE = WORD_SIZE,
 	LIVE_RATIO = 8,	/* The desired live ratio. */
+	MINOR = TRUE, /* Are minor gcs allowed. */
 	STACK_HEADER_SIZE = WORD_SIZE,
 };
 
@@ -116,7 +118,7 @@
 }
 
 
-static inline uint max(uint x, uint y) {
+static inline uint max (uint x, uint y) {
 	return ((x > y) ? x : y);
 }
 #endif
@@ -130,7 +132,7 @@
 }
 
 
-static inline uint roundUp (uint a, uint b) {
+static inline uint align (uint a, uint b) {
 	assert (a >= 0);
 	assert (b >= 1);
 	a += b - 1;
@@ -138,16 +140,13 @@
 	return a;	
 }
 
-/*
- * Round size up to a multiple of the size of a page.
- */
-static inline size_t roundPage (GC_state s, size_t size) {
-	return roundUp (size, s->pageSize);
+static bool isAligned (uint a, uint b) {
+	return 0 == a % b;
 }
 
 #ifndef NODEBUG
 static bool isPageAligned (GC_state s, size_t size) {
-	return 0 == (size % s->pageSize);
+	return isAligned (size, s->pageSize);
 }
 #endif
 
@@ -157,22 +156,18 @@
  *  Any attempt to touch the dead zone (read or write) will cause a
  *   segmentation fault.
  */
-static void *ssmmap(size_t length, size_t dead_low, size_t dead_high) {
-  void *base,*low,*result,*high;
-
-  base = smmap(length + dead_low + dead_high);
-
-  low = base;
-  if (mprotect(low, dead_low, PROT_NONE))
-    diee("mprotect failed");
-
-  result = low + dead_low;
-  high = result + length;
-
-  if (mprotect(high, dead_high, PROT_NONE))
-    diee("mprotect failed");
+static void *ssmmap (size_t length, size_t dead_low, size_t dead_high) {
+	void *base,*low,*result,*high;
 
-  return result;
+	base = smmap (length + dead_low + dead_high);
+	low = base;
+	if (mprotect (low, dead_low, PROT_NONE))
+		diee ("mprotect failed");
+	result = low + dead_low;
+	high = result + length;
+	if (mprotect(high, dead_high, PROT_NONE))
+		diee ("mprotect failed");
+	return result;
 }
 #endif
 
@@ -461,10 +456,12 @@
 /* ---------------------------------------------------------------- */
 
 void GC_display (GC_state s, FILE *stream) {
-	fprintf (stream, "GC state\n\tcardMap = 0x%08x\n\toldGen = 0x%08x\n\tnursery = 0x%08x\n\tfrontier - nursery = %u\n\tlimitPlusSlop - frontier = %d\n",
+	fprintf (stream, "GC state\n\tcardMap = 0x%08x\n\toldGen = 0x%08x\n\toldGen + oldGenSize = 0x%08x\n\tnursery = 0x%08x\n\tfrontier = 0x%08x\n\tfrontier - nursery = %u\n\tlimitPlusSlop - frontier = %d\n",
 			(uint) s->heap.cardMap,
        			(uint) s->heap.oldGen,
+			(uint) s->heap.oldGen + s->heap.oldGenSize,
 			(uint) s->heap.nursery, 
+			(uint) s->frontier,
 			s->frontier - s->heap.nursery,
 			s->limitPlusSlop - s->frontier);
 	fprintf (stream, "\tcanHandle = %d\n", s->canHandle);
@@ -477,6 +474,32 @@
 			(uint)s->stackBottom,
 			s->stackTop - s->stackBottom,
 			(s->stackLimit - s->stackTop));
+	if (DEBUG_DETAILED) {
+		int i;
+
+		fprintf (stderr, "crossMap trues");
+		for (i = 0; i < s->heap.numCards; ++i)
+			if (s->heap.crossMap[i])
+				fprintf (stderr, " %u", i);
+		fprintf (stderr, "\n");
+	}		
+}
+
+static inline uint cardNumToSize (GC_state s, uint n) {
+	return n << s->cardSizeLog2;
+}
+
+static inline uint divCardSize (GC_state s, uint n) {
+	return n >> s->cardSizeLog2;
+}
+
+static inline bool cardIsMarked (GC_state s, pointer p) {
+	return s->heap.cardMap[divCardSize (s, (uint)p)];
+}
+
+static inline void markCard (GC_state s, pointer p) {
+	if (s->generational)
+		s->heap.cardMap[divCardSize (s, (uint)p)] = '\001';
 }
 
 /* ---------------------------------------------------------------- */
@@ -584,6 +607,9 @@
 	s->stackBottom = stackBottom (stack);
 	s->stackTop = stackTop (stack);
 	s->stackLimit = stackLimit (s, stack);
+	/* We must card mark the stack because it will be updated by the mutator.
+	 */
+	markCard (s, (pointer)stack);
 }
 
 static inline void stackCopy (GC_stack from, GC_stack to) {
@@ -677,7 +703,7 @@
 		for ( ; p < max; p += POINTER_SIZE) {
 			if (DEBUG_DETAILED)
 				fprintf (stderr, "p = 0x%08x  *p = 0x%08x\n",
-						(uint)p, (uint)*p);
+						(uint)p, *(uint*)p);
 			maybeCall (f, s, (pointer*)p);
 		}
 	} else if (ARRAY_TAG == tag) {
@@ -782,16 +808,19 @@
  * contiguous sequence of objects, where front points at the beginning of
  * the first object and *back points just past the end of the last object.
  * f may increase *back (for example, this is done by forward).
+ * foreachPointerInRange returns apointer to the end of the last object it
+ * visits.
  */
 
-static inline void foreachPointerInRange (GC_state s, pointer front, 
+static inline pointer foreachPointerInRange (GC_state s, 
+						pointer front, 
 						pointer *back,
 						GC_pointerFun f) {
 	pointer b;
 
 	if (DEBUG_DETAILED)
 		fprintf (stderr, "foreachPointerInRange  front = 0x%08x  *back = 0x%08x\n",
-				(uint)front, (uint)*back);
+				(uint)front, *(uint*)back);
 	b = *back;
 	assert (front <= b);
  	while (front < b) {
@@ -799,12 +828,12 @@
 			assert (isWordAligned ((uint)front));
 	       		if (DEBUG_DETAILED)
 				fprintf (stderr, "front = 0x%08x  *back = 0x%08x\n",
-						(uint)front, (uint)*back);
+						(uint)front, *(uint*)back);
 			front = foreachPointerInObject (s, f, toData (front));
 		}
 		b = *back;
 	}
-	assert(front == *back);
+	return front;
 }
 
 /* ---------------------------------------------------------------- */
@@ -813,28 +842,51 @@
 
 #ifndef NODEBUG
 
+static inline bool isInOldGen (GC_state s, pointer p) {
+	return s->heap.oldGen <= p and p < s->heap.oldGen + s->heap.oldGenSize;
+}
+
+static inline bool isInNursery (GC_state s, pointer p) {
+	return s->heap.nursery <= p and p < s->frontier;
+}
+
 static inline bool isInFromSpace (GC_state s, pointer p) {
- 	return ((s->heap.oldGen <= p and p < s->heap.toSpace)
-		or (s->heap.nursery <= p and p < s->frontier));
+ 	return (isInOldGen (s, p) or isInNursery (s, p));
 }
 
 static inline void assertIsInFromSpace (GC_state s, pointer *p) {
 #ifndef NODEBUG
 	unless (isInFromSpace (s, *p))
 		die ("gc.c: assertIsInFromSpace p = 0x%08x  *p = 0x%08x);\n",
-			(uint)p, (uint)*p);
+			(uint)p, *(uint*)p);
+	/* The following checks that intergenerational pointers have the
+	 * appropriate card marked.  Unfortunately, it doesn't work because
+	 * for stacks, the card containing the beginning of the stack is marked,
+	 * but any remaining cards aren't.
+	 */
+	if (FALSE and s->generational 
+		and isInOldGen (s, (pointer)p) 
+ 		and isInNursery (s, *p)
+		and not cardIsMarked (s, (pointer)p)) {
+		GC_display (s, stderr);
+		die ("gc.c: intergenerational pointer from 0x%08x to 0x%08x with unmarked card.\n",
+			(uint)p, *(uint*)p);
+	}
 #endif
 }
 
 static inline bool isInToSpace (GC_state s, pointer p) {
 	return (not (GC_isPointer (p))
-		or (s->heap2.oldGen <= p 
-			and p < s->heap2.oldGen + s->heap2.size));
+		or (s->doingMinorGC
+			? s->heap.oldGen + s->heap.oldGenSize <= p
+				and p < s->heap.nursery
+			: s->heap2.oldGen <= p 
+				and p < s->heap2.oldGen + s->heap2.size));
 }
 
 static bool invariant (GC_state s) {
-	/* would be nice to add divisiblity by pagesize of various things */
 	int i;
+	pointer back;
 	GC_stack stack;
 
 	if (DEBUG)
@@ -853,7 +905,12 @@
 		}
 	}
 	/* Heap */
-	assert (isWordAligned ((uint)s->frontier));
+	assert (isAligned (s->heap.size, s->cardSize));
+	assert (isAligned ((uint)s->heap.oldGen, s->cardSize));
+	assert (isAligned (s->heap.oldGenSize, WORD_SIZE));
+	assert (isAligned ((uint)s->heap.nursery, WORD_SIZE));
+	assert (isAligned (s->heap.nurserySize, WORD_SIZE));
+	assert (isAligned ((uint)s->frontier, WORD_SIZE));
 	assert (s->heap.nursery <= s->frontier);
 	assert (0 == s->heap.size
 		or (isPageAligned (s, s->heap.totalSize)
@@ -863,7 +920,9 @@
 	assert (s->heap2.start == NULL or s->heap.size == s->heap2.size);
 	/* Check that all pointers are into from space. */
 	foreachGlobal (s, assertIsInFromSpace);
-	foreachPointerInRange (s, s->heap.oldGen, &s->frontier, 
+	back = s->heap.oldGen + s->heap.oldGenSize;
+	foreachPointerInRange (s, s->heap.oldGen, &back, assertIsInFromSpace);
+	foreachPointerInRange (s, s->heap.nursery, &s->frontier, 
 				assertIsInFromSpace);
 	/* Current thread. */
 	stack = s->currentThread->stack;
@@ -972,30 +1031,27 @@
 	h->totalSize = 0;
 }
 
-static inline void releaseFromSpace (GC_state s) {
-	heapRelease (s, &s->heap);
-}
-
 static inline void releaseToSpace (GC_state s) {
 	heapRelease (s, &s->heap2);
 }
 
 static inline void heapShrink (GC_state s, GC_heap h, W32 keep) {
 	assert (keep <= h->size);
+	keep = align (keep, s->cardSize);
 	if (0 == keep)
 		heapRelease (s, h);
 	else if (keep < h->size) {
 		uint remove;
 
 		remove = (uint)h->start + h->totalSize 
-				- roundPage (s, (uint)h->oldGen + keep);
-		assert (isPageAligned (s, remove));
+				- align ((uint)h->oldGen + keep, s->pageSize);
+		assert (isAligned (remove, s->pageSize));
 		if (DEBUG or s->messages)
 			fprintf (stderr, 
-				"Shrinking space at 0x%08x of size %u by %u bytes.\n",
+				"Shrinking space at 0x%08x of size %s to %s bytes.\n",
 				(uint)h->start, 
-				(uint)h->totalSize, 
-				(uint)remove);
+				uintToCommaString ((uint)h->totalSize), 
+				uintToCommaString ((uint)h->totalSize - remove));
 		h->size = keep;
 		if (remove > 0) {
 			decommit (h->start + h->totalSize - remove, remove);
@@ -1013,12 +1069,16 @@
 	GC_heap h;
 
 	h = &s->heap;
-	h->oldGenSize = s->bytesLive;
-	h->toSpace = h->oldGen + h->oldGenSize;
-	h->nurserySize = h->oldGen + h->size - h->toSpace;
-	if (FALSE and s->generational) /* FIXME */
+	h->nurserySize = h->size - h->oldGenSize;
+	assert (isAligned (h->nurserySize, WORD_SIZE));
+	if (MINOR and s->generational) {
 		h->nurserySize /= 2;
+		unless (isAligned (h->nurserySize, WORD_SIZE))
+			h->nurserySize -= 2;
+	}
+	assert (isAligned (h->nurserySize, WORD_SIZE));
 	h->nursery = h->oldGen + h->size - h->nurserySize;
+	assert (isAligned ((uint)h->nursery, WORD_SIZE));
 	s->frontier = h->nursery;
 	setLimit (s);
 }
@@ -1033,7 +1093,15 @@
 	heapShrink (s, &s->heap2, keep);
 }
 
-/* heapCreate (s, need, minSize) allocates a heap of the size necessary to
+static inline void heapClearCardMap (GC_heap h) {
+	memset (h->start, 0, h->numCards);
+}
+
+static inline void heapClearCrossMap (GC_heap h) {
+	memset (h->crossMap, 0, h->numCards);
+}
+
+/* heapCreate (s, h, need, minSize) allocates a heap of the size necessary to
  * work with need live data, and ensures that at least minSize is available.
  * It returns TRUE if it is able to allocate the space, and returns FALSE if it
  * is unable.  If a reasonable size to space is already there, then heapCreate
@@ -1049,6 +1117,7 @@
 	requested = heapDesiredSize (s, need);
 	if (requested < minSize)
 		requested = minSize;
+	requested = align (requested, s->cardSize);
 	if (h->size >= minSize and h->size >= requested / 2)
 		/* Tospace is big enough.  Keep it. */
 		return TRUE;
@@ -1058,6 +1127,7 @@
 	backoff = (requested - minSize) / 20;
 	if (0 == backoff)
 		backoff = 1; /* enough to terminate the loop below */
+	backoff = align (backoff, s->cardSize);
 	/* mmap toggling back and forth between high and low addresses to
          * decrease the chance of virtual memory fragmentation causing an mmap
 	 * to fail.  This is important for large heaps.
@@ -1067,19 +1137,19 @@
 		static int direction = 1;
 		int i;
 
+		assert (isAligned (h->size, s->cardSize));
 		if (s->generational)
-			h->numCards = roundUp (h->size, BYTES_PER_CARD) 
-						/ BYTES_PER_CARD;
+			h->numCards = divCardSize (s, h->size);
 		else
 			h->numCards = 0;
 		if (DEBUG_DETAILED)
 			fprintf (stderr, "numCards = %u\n", h->numCards);
-		/* We make sure that the card maps take up a multiple of
-		 * BYTES_PER_CARD bytes so that the heap starts on a card
+		/* We make sure that the card map and cross map take up a 
+		 * multiple of s->cardSize bytes so that oldGen starts on a card
 		 * boundary.
 		 */
-		cardMapSpace = roundUp (2 * h->numCards, BYTES_PER_CARD);
-		h->totalSize = roundPage (s, h->size + cardMapSpace);
+		cardMapSpace = align (2 * h->numCards, s->cardSize);
+		h->totalSize = align (h->size + cardMapSpace, s->pageSize);
 		for (i = 0; i < 32; i++) {
 			unsigned long address;
 
@@ -1110,9 +1180,18 @@
 				if (h->totalSize > s->maxHeapSizeSeen)
 					s->maxHeapSizeSeen = h->totalSize;
 				h->oldGen = h->start + cardMapSpace;
-				assert ((uint)h->oldGen / BYTES_PER_CARD <= (uint)h->start);
-				h->cardMap = h->start - ((uint)h->oldGen / BYTES_PER_CARD);
-
+				assert (isAligned ((uint)h->oldGen, s->cardSize));
+				if (s->generational) {
+					assert (divCardSize (s, (uint)h->oldGen) <= (uint)h->start);
+					h->cardMap = h->start 
+						- divCardSize (s, (uint)h->oldGen);
+					assert (h->start == &h->cardMap[divCardSize(s, (uint)h->oldGen)]);
+					assert (h->start + h->numCards - 1 == &h->cardMap[divCardSize (s, (uint)h->oldGen + h->size - WORD_SIZE)]);
+					h->crossMap = h->start + h->numCards;
+				} else {
+					h->cardMap = NULL;
+					h->crossMap = NULL;
+				}
 				if (DEBUG or s->messages)
 					fprintf (stderr, "Created heap of size %s at 0x%08x.\n",
 							uintToCommaString (h->totalSize),
@@ -1129,6 +1208,40 @@
 	return FALSE;
 }
 
+static inline void setCrossMap (GC_state s, pointer p) {
+	if (s->generational and isAligned ((uint)p, s->cardSize)) {
+		GC_heap h;	
+
+		h = s->heapp;
+		if (DEBUG_GENERATIONAL)
+			fprintf (stderr, "crossMap[%u] = TRUE\n",
+					divCardSize (s, p - h->oldGen));
+		h->crossMap[divCardSize (s, p - h->oldGen)] = '\001';
+	}
+}
+
+static inline uint objectSize (GC_state s, pointer p)
+{
+	uint headerBytes, objectBytes;
+       	word header;
+	uint tag, numPointers, numNonPointers;
+
+	header = GC_getHeader(p);
+	SPLIT_HEADER();
+	if (NORMAL_TAG == tag) { /* Fixed size object. */
+		headerBytes = GC_NORMAL_HEADER_SIZE;
+		objectBytes = toBytes (numPointers + numNonPointers);
+	} else if (STACK_TAG == tag) { /* Stack. */
+		headerBytes = STACK_HEADER_SIZE;
+		objectBytes = sizeof(struct GC_stack) + ((GC_stack)p)->reserved;
+	} else { /* Array. */
+		assert(ARRAY_TAG == tag);
+		headerBytes = GC_ARRAY_HEADER_SIZE;
+		objectBytes = arrayNumBytes(p, numPointers, numNonPointers);
+	}
+	return headerBytes + objectBytes;
+}
+
 /* ---------------------------------------------------------------- */
 /*                    Cheney Copying Collection                     */
 /* ---------------------------------------------------------------- */
@@ -1139,6 +1252,7 @@
 
 /* forward (s, pp) forwards the object pointed to by *pp and updates *pp to 
  * point to the new object. 
+ * It also updates the crossMap if the object starts a card boundary.
  */
 static inline void forward (GC_state s, pointer *pp) {
 	pointer p;
@@ -1146,7 +1260,7 @@
 	word tag;
 
 	if (DEBUG_DETAILED)
-		fprintf(stderr, "forward  pp = 0x%x  *pp = 0x%x\n", (uint)pp, (uint)*pp);
+		fprintf(stderr, "forward  pp = 0x%x  *pp = 0x%x\n", (uint)pp, *(uint*)pp);
 	assert (isInFromSpace (s, *pp));
 	p = *pp;
 	header = GC_getHeader(p);
@@ -1182,7 +1296,9 @@
 			skip = stack->reserved - stack->used;
 		}
 		size = headerBytes + objectBytes;
-		assert (s->back + size + skip <= s->heap2.oldGen + s->heap2.size);
+		assert (s->doingMinorGC
+				? s->back + size + skip <= s->heap.nursery
+				: s->back + size + skip <= s->heap2.oldGen + s->heap2.size);
   		/* Copy the object. */
 		if (DEBUG_DETAILED)
 			fprintf (stderr, "copying from 0x%08x to 0x%08x of size %u\n",
@@ -1194,30 +1310,15 @@
  		/* Store the forwarding pointer in the old object. */
 		*(word*)(p - WORD_SIZE) = FORWARDED;
 		*(pointer*)p = s->back + headerBytes;
+		setCrossMap (s, s->back);
 		/* Update the back of the queue. */
 		s->back += size + skip;
-		assert(isWordAligned((uint)s->back));
+		assert (isAligned ((uint)s->back, WORD_SIZE));
 	}
 	*pp = *(pointer*)p;
 	assert (isInToSpace (s, *pp));
 }
 
-static inline void forwardEachPointerInRange (GC_state s, pointer front,
-						pointer *back) {
-	pointer b;
-
-	b = *back;
-	assert(front <= b);
-	while (front < b) {
-		while (front < b) {
-			assert(isWordAligned((uint)front));
-			front = foreachPointerInObject(s, forward, toData(front));
-		}
-		b = *back;
-	}
-	assert(front == *back);
-}
-
 static void swapSemis (GC_state s) {
 	struct GC_heap h;
 
@@ -1229,12 +1330,13 @@
 
 static inline void cheneyCopy (GC_state s) {
 	s->numCopyingGCs++;
+	s->heapp = &s->heap2;
  	if (DEBUG or s->messages) {
-		fprintf (stderr, "Copying GC.\n");
-	 	fprintf (stderr, "fromSpace = 0x%08x  fromSpace size = %s\n", 
+		fprintf (stderr, "Major copying GC.\n");
+	 	fprintf (stderr, "fromSpace = 0x%08x of size %s\n", 
 				(uint) s->heap.oldGen,
 				uintToCommaString (s->heap.size));
-		fprintf (stderr, "toSpace = 0x%08x  toSpace size = %s\n",
+		fprintf (stderr, "toSpace = 0x%08x of size %s\n",
 				(uint) s->heap2.oldGen,
 				uintToCommaString (s->heap2.size));
 	}
@@ -1244,17 +1346,146 @@
   	 * because that is too strong.
 	 */
 	assert (s->heap2.size >= s->frontier - s->heap.nursery);
-
+	heapClearCardMap (&s->heap2);
+	heapClearCrossMap (&s->heap2);
 	s->back = s->heap2.oldGen;
 	foreachGlobal (s, forward);
-	forwardEachPointerInRange (s, s->heap2.oldGen, &s->back);
+	foreachPointerInRange (s, s->heap2.oldGen, &s->back, forward);
 	s->bytesLive = s->back - s->heap2.oldGen;
+	s->heap2.oldGenSize = s->bytesLive;
 	if (DEBUG)
 		fprintf (stderr, "bytesLive = %u\n", s->bytesLive);
 	swapSemis (s);
 	s->bytesCopied += s->bytesLive;
  	if (DEBUG or s->messages)
-		fprintf (stderr, "Copying GC done.\n");
+		fprintf (stderr, "Major copying GC done.\n");
+}
+
+/* ---------------------------------------------------------------- */
+/*                     Minor copying collection                     */
+/* ---------------------------------------------------------------- */
+
+static inline void forwardIfInNursery (GC_state s, pointer *pp) {
+	pointer p;
+
+	p = *pp;
+	if (p < s->heap.nursery)
+		return;
+	if (DEBUG_GENERATIONAL)
+		fprintf (stderr, "intergenerational pointer from 0x%08x to 0x%08x\n",
+			(uint)pp, *(uint*)pp);
+	assert (s->heap.nursery <= p 
+			and p < s->heap.nursery + s->heap.nurserySize);
+	forward (s, pp);
+}
+
+/* Walk through all the cards and forward all intergenerational pointers. */
+static inline void forwardInterGenerationalPointers (GC_state s) {
+	pointer cardMap;
+	uint cardNum;
+	pointer crossMap;
+	GC_heap h;
+	uint numCards;
+	pointer objectStart;
+	pointer oldGenStart;
+	pointer oldGenEnd;
+
+	if (DEBUG_GENERATIONAL)
+		fprintf (stderr, "Forwarding inter-generational pointers.\n");
+	h = &s->heap;
+	/* Constants. */
+	cardMap = h->start;
+	crossMap = h->crossMap;
+	numCards = divCardSize (s, align (h->oldGenSize, s->cardSize));
+	oldGenStart = h->oldGen;
+	oldGenEnd = oldGenStart + h->oldGenSize;
+	/* Loop variables*/
+	objectStart = h->oldGen;
+	cardNum = 0;
+checkAll:
+	assert (cardNum <= numCards);
+	if (cardNum == numCards)
+		goto done;
+checkCard:
+	if (DEBUG_GENERATIONAL)
+		fprintf (stderr, "checking card %u  objectStart = 0x%08x  cardEnd = 0x%08x\n",
+				cardNum, 
+				(uint)objectStart,
+				(uint)oldGenStart + cardNumToSize (s, cardNum + 1));
+	assert (objectStart < oldGenStart + cardNumToSize (s, cardNum + 1));
+	if (cardMap[cardNum]) {
+		pointer cardStart;
+		pointer cardEnd;
+		uint size;
+
+		if (DEBUG_GENERATIONAL)
+			fprintf (stderr, "card %u is marked  objectStart = 0x%08x\n", 
+					cardNum, (uint)objectStart);
+		cardStart = oldGenStart + cardNumToSize (s, cardNum);
+skipObjects:
+		size = objectSize (s, toData (objectStart));
+		if (objectStart + size < cardStart) {
+			objectStart += size;
+			goto skipObjects;
+		}
+		cardEnd = cardStart + s->cardSize;
+		if (oldGenEnd < cardEnd) 
+			cardEnd = oldGenEnd;
+		assert (objectStart < cardEnd);
+		objectStart = 
+			foreachPointerInRange (s, objectStart, &cardEnd,
+						forwardIfInNursery);
+		if (objectStart == oldGenEnd)
+			goto done;
+		cardNum = divCardSize (s, objectStart - oldGenStart);
+		goto checkCard;
+	} else {
+		cardNum++;
+		if (crossMap[cardNum]) {
+			objectStart = oldGenStart + cardNumToSize (s, cardNum);
+			if (DEBUG_GENERATIONAL)
+				fprintf (stderr, "crossMap[%u] == TRUE   objectStart = 0x%08x\n", 
+						cardNum, (uint)objectStart);
+		}
+		goto checkAll;
+	}
+	assert (FALSE);
+done:
+	if (DEBUG_GENERATIONAL)
+		fprintf (stderr, "Forwarding inter-generational pointers done.\n");
+}
+
+static inline void minorGC (GC_state s) {
+	pointer toSpace;
+
+	if (not MINOR) {
+		s->heap.oldGenSize += s->frontier - s->heap.nursery;
+		s->heap.nursery = s->frontier;
+		return;
+	}
+	if (DEBUG_GENERATIONAL or s->messages)
+		fprintf (stderr, "Minor GC.\n");
+	assert (invariant (s));
+	s->numMinorGCs++;
+	s->numMinorsSinceLastMajor++;
+	s->doingMinorGC = TRUE;
+	s->heapp = &s->heap;
+	toSpace = s->heap.oldGen + s->heap.oldGenSize;
+	s->back = toSpace;
+	/* Forward all globals.  Would like to avoid doing this once all the
+ 	 * globals have been assigned.
+	 */
+	foreachGlobal (s, forwardIfInNursery);
+	forwardInterGenerationalPointers (s);
+	foreachPointerInRange (s, toSpace, &s->back, forwardIfInNursery);
+	s->heap.oldGenSize = s->back - s->heap.oldGen;
+	setNursery (s);
+	setStack (s);
+	s->bytesCopiedMinor += s->back - toSpace;
+	s->doingMinorGC = FALSE;
+	assert (invariant (s));
+	if (DEBUG_GENERATIONAL or s->messages)
+		fprintf (stderr, "Minor GC done.\n");
 }
 
 /* ---------------------------------------------------------------- */
@@ -1540,28 +1771,6 @@
 	*headerp = (Header)pp;
 }
 
-static inline uint objectSize (GC_state s, pointer p)
-{
-	uint headerBytes, objectBytes;
-       	word header;
-	uint tag, numPointers, numNonPointers;
-
-	header = GC_getHeader(p);
-	SPLIT_HEADER();
-	if (NORMAL_TAG == tag) { /* Fixed size object. */
-		headerBytes = GC_NORMAL_HEADER_SIZE;
-		objectBytes = toBytes (numPointers + numNonPointers);
-	} else if (STACK_TAG == tag) { /* Stack. */
-		headerBytes = STACK_HEADER_SIZE;
-		objectBytes = sizeof(struct GC_stack) + ((GC_stack)p)->reserved;
-	} else { /* Array. */
-		assert(ARRAY_TAG == tag);
-		headerBytes = GC_ARRAY_HEADER_SIZE;
-		objectBytes = arrayNumBytes(p, numPointers, numNonPointers);
-	}
-	return headerBytes + objectBytes;
-}
-
 static inline void updateForwardPointers (GC_state s) {
 	pointer back;
 	pointer front;
@@ -1573,12 +1782,15 @@
 	uint size;
 
 	if (DEBUG_MARK_COMPACT)
-		fprintf (stderr, "updateForwardPointers\n");
-	back = s->frontier;
+		fprintf (stderr, "Update forward pointers.\n");
+	back = s->heap.oldGen + s->heap.oldGenSize;
 	front = s->heap.oldGen;
 	endOfLastMarked = front;
 	gap = 0;
 updateObject:
+	if (DEBUG_MARK_COMPACT)
+		fprintf (stderr, "updateObject  front = 0x%08x  back = 0x%08x\n",
+				(uint)front, (uint)back);
 	if (front == back)
 		goto done;
 	headerp = (Header*)front;
@@ -1665,15 +1877,16 @@
 	Header header;
 	pointer p;
 	uint size;
-	uint live;
 
 	if (DEBUG_MARK_COMPACT)
-		fprintf (stderr, "updateBackwardPointersAndSlide\n");
-	back = s->frontier;
+		fprintf (stderr, "Update backward pointers and slide.\n");
+	back = s->heap.oldGen + s->heap.oldGenSize;
 	front = s->heap.oldGen;
 	gap = 0;
-	live = 0;
 updateObject:
+	if (DEBUG_MARK_COMPACT)
+		fprintf (stderr, "updateObject  front = 0x%08x  back = 0x%08x\n",
+				(uint)front, (uint)back);
 	if (front == back)
 		goto done;
 	header = *(word*)front;
@@ -1695,13 +1908,12 @@
 			if (DEBUG_MARK_COMPACT)
 				fprintf (stderr, "unmarking 0x%08x of size %u\n", 
 						(uint)p, size);
+			setCrossMap (s, front - gap);
 			/* slide */
-			unless (0 == gap)
-				if (DEBUG_MARK_COMPACT)
-					fprintf (stderr, "sliding 0x%08x down %u\n",
-							(uint)front, gap);
+			if (DEBUG_MARK_COMPACT)
+				fprintf (stderr, "sliding 0x%08x down %u\n",
+						(uint)front, gap);
 			copy (front, front - gap, size);
-			live += size;
 			front += size;
 			goto updateObject;
 		} else {
@@ -1729,29 +1941,40 @@
 			header = *(word*)cur;
 			*(word*)cur = (word)new;
 		} while (0 == (1 & header));
-		/* The header will be stored by umark. */
+		/* The header will be stored by unmark. */
 		goto unmark;
 	}
 	assert (FALSE);
 done:
-	s->bytesLive = live;
+	s->bytesLive = front - gap - s->heap.oldGen;
+	if (DEBUG_MARK_COMPACT)
+		fprintf (stderr, "bytesLive = %u\n", s->bytesLive);
 	return;
 }
 
 static inline void markCompact (GC_state s) {
+	/* markCompact relies on all the objects being contiguous.  So, if
+	 * we haven't yet done a minorGC to make them contiguous, do so.
+	 */
+	if (s->generational and s->frontier > s->heap.nursery)
+		minorGC (s);
 	if (DEBUG or s->messages)
-		fprintf (stderr, "Mark-compact GC.\n");
+		fprintf (stderr, "Major mark-compact GC.\n");
 	s->numMarkCompactGCs++;
+	s->heapp = &s->heap;
+	heapClearCardMap (&s->heap);
+	heapClearCrossMap (&s->heap);
+	fprintf (stderr, "Marking.\n");
 	foreachGlobal (s, markGlobal);
 	foreachGlobal (s, threadInternal);
 	updateForwardPointers (s);
 	updateBackwardPointersAndSlide (s);
 	s->bytesMarkCompacted += s->bytesLive;
+	s->heap.oldGenSize = s->bytesLive;
 	if (DEBUG or s->messages)
-		fprintf (stderr, "Mark-compact GC done.\n");
+		fprintf (stderr, "Major mark-compact GC done.\n");
 }
 
-
 static void translatePointer (GC_state s, pointer *p) {
 	if (s->translateUp)
 		*p += s->translateDiff;
@@ -1784,6 +2007,10 @@
 	foreachPointerInRange (s, to, &limit, translatePointer);
 }
 
+/* ---------------------------------------------------------------- */
+/*                            resizeHeap                            */
+/* ---------------------------------------------------------------- */
+
 /* Resize from space and to space, guaranteeing that at least 'need' bytes are
  * available in from space and that to space is either the same size as from
  * space or is unmapped.
@@ -1795,8 +2022,8 @@
 	grow = FALSE;
 	keep = 0;
 	if (DEBUG_RESIZING)
-		fprintf (stderr, "resizeHeap  need = %llu  fromSize = %u\n",
-				need, s->heap.size);
+		fprintf (stderr, "resizeHeap  need = %llu  fromSize = %s\n",
+				need, uintToCommaString (s->heap.totalSize));
 	if (need >= s->heap.size)
 		grow = TRUE;
 	else if (need * LIVE_RATIO_MIN >= s->ramSlop * s->totalRam) {
@@ -1825,25 +2052,43 @@
 			keep = s->heap.size;
 	}
 	if (DEBUG_RESIZING)
-		fprintf (stderr, "size = %u  need = %u  keep = %u\n",
-				(uint)s->heap.size, (uint)need, (uint)keep);
+		fprintf (stderr, "size = %s  need = %s  keep = %s\n",
+				uintToCommaString ((uint)s->heap.totalSize), 
+				uintToCommaString ((uint)need), 
+				uintToCommaString ((uint)keep));
 	/* Shrink or grow the heap. */
 	if (not grow) {
 		assert (keep <= s->heap.size);
 		shrinkFromSpace (s, keep);
 	} else {
 		pointer old;
+		uint size;
 
 		if (DEBUG_RESIZING)
-			fprintf (stderr, "Growing from space.  bytesLive = %u\n",
-					(uint)s->bytesLive);
+			fprintf (stderr, "Growing from space.  oldGenSize = %u\n",
+					(uint)s->heap.oldGenSize);
 		releaseToSpace (s);
 		old = s->heap.oldGen;
-		assert (s->bytesLive <= s->heap.size);
-		shrinkFromSpace (s, s->bytesLive);
+		size = s->heap.oldGenSize;
+		assert (size <= s->heap.size);
+		shrinkFromSpace (s, size);
 		/* Allocate a space of the desired size. */
 		if (heapCreate (s, &s->heap2, need, need)) {
-			copy (s->heap.oldGen, s->heap2.oldGen, s->bytesLive);
+			pointer from;
+			pointer to;
+
+			from = old + size;
+			to = s->heap2.oldGen + size;
+copy:			
+			from -= COPY_CHUNK_SIZE;
+			to -= COPY_CHUNK_SIZE;
+			if (from > old) {
+				copy (from, to, COPY_CHUNK_SIZE);
+				heapShrink (s, &s->heap, from - old);
+				goto copy;
+			}
+			copy (old, s->heap2.oldGen, 
+				from + COPY_CHUNK_SIZE - old);
 			heapRelease (s, &s->heap);
 			swapSemis (s);
 		} else {
@@ -1858,12 +2103,12 @@
 				fprintf (stderr, "Paging from space to %s.\n", 
 						template);
 			stream = sfopen (template, "wb");
-			sfwrite (old, 1, s->bytesLive, stream);
+			sfwrite (old, 1, size, stream);
 			sfclose (stream);
-			releaseFromSpace (s);
+			heapRelease (s, &s->heap);
 			if (heapCreate (s, &s->heap, need, need)) {
 				stream = sfopen (template, "rb");
-				sfread (s->heap.oldGen, 1, s->bytesLive, stream);
+				sfread (s->heap.oldGen, 1, size, stream);
 				sfclose (stream);
 				sunlink (template);
 			} else {
@@ -1873,7 +2118,8 @@
 				die ("Out of memory.  Need %llu bytes.\n", need);
 			}
 		}
-		translateHeap (s, old, s->heap.oldGen, s->bytesLive);
+		s->heap.oldGenSize = size;
+		translateHeap (s, old, s->heap.oldGen, size);
 	}
 	setNursery (s);
 	setStack (s);
@@ -1906,6 +2152,7 @@
 	stack = newStack (s, size);
 	stackCopy (s->currentThread->stack, stack);
 	s->currentThread->stack = stack;
+	markCard (s, (pointer)s->currentThread);
 	setStack (s);
 }
 
@@ -1919,26 +2166,19 @@
 /*                        Garbage Collection                        */
 /* ---------------------------------------------------------------- */
 
-void doGC (GC_state s, uint bytesRequested) {
-	uint gcTime;
-	uint size;
-	uint stackBytesRequested;
-	struct rusage ru_start, ru_finish, ru_total;
-	
-	assert (invariant (s));
-	if (DEBUG or s->messages)
-		fprintf (stderr, "Starting gc.  bytesRequested = %u\n",
-					bytesRequested);
-	fixedGetrusage (RUSAGE_SELF, &ru_start);
- 	s->bytesAllocated += s->frontier - (s->heap.nursery + s->bytesLive);
-	size = s->heap.size;
-	stackBytesRequested = getStackBytesRequested (s);
+static inline void majorGC (GC_state s, uint totalBytesRequested) {
+	s->numMinorsSinceLastMajor = 0;
+	/* If using generational GC, then we need to multiply the total bytes
+	 * requested by 2 since the heap space remaining is split in half, with
+	 * half for the nursery and half for the to space of a minor GC.
+	 */
+	if (s->generational)
+		totalBytesRequested *= 2;
         if (not s->useFixedHeap
  		and (W64)s->bytesLive + (W64)s->heap.size 
 			<= s->ramSlop * s->totalRam
 		and heapCreate (s, &s->heap2,
-					(W64)s->bytesLive + (W64)bytesRequested 
-					        + (W64)stackBytesRequested,
+					(W64)s->bytesLive + totalBytesRequested,
 					s->heap.oldGenSize 
 						+ s->frontier - s->heap.nursery))
 		cheneyCopy (s);
@@ -1948,8 +2188,37 @@
 	setStack (s);
 	if (s->bytesLive > s->maxBytesLive)
 		s->maxBytesLive = s->bytesLive;
-	resizeHeap (s, (W64)s->bytesLive + (W64)bytesRequested 
- 			+ (W64)stackBytesRequested);
+	/* Notice that the s->bytesLive below is different than the s->bytesLive
+	 * used is an argument to heapCreate above.  Above, it was an estimate.
+	 * Here, it is exactly how much was live after the GC.
+	 */
+	resizeHeap (s, (W64)s->bytesLive + totalBytesRequested);
+}
+
+void doGC (GC_state s, uint bytesRequested, bool forceMajor) {
+	uint gcTime;
+	uint size;
+	uint stackBytesRequested;
+	struct rusage ru_start, ru_finish, ru_total;
+	W64 totalBytesRequested;
+	
+	assert (invariant (s));
+	if (DEBUG or s->messages)
+		fprintf (stderr, "Starting gc.  bytesRequested = %u\n",
+					bytesRequested);
+	fixedGetrusage (RUSAGE_SELF, &ru_start);
+ 	s->bytesAllocated += s->frontier - s->heap.nursery;
+	forceMajor = forceMajor
+			or not s->generational
+/*			or s->heap.oldGenSize > 0.90 * s->heap.size */
+			or 10 == s->numMinorsSinceLastMajor;
+	if (not forceMajor)
+		minorGC (s);
+	stackBytesRequested = getStackBytesRequested (s);
+	totalBytesRequested = (W64)bytesRequested + (W64)stackBytesRequested;
+	size = s->heap.size;
+	if (forceMajor or totalBytesRequested > s->limitPlusSlop - s->frontier)
+		majorGC (s, totalBytesRequested);
 	if (stackBytesRequested > 0)
 		growStack (s);
 	fixedGetrusage (RUSAGE_SELF, &ru_finish);
@@ -1960,9 +2229,9 @@
 	if (DEBUG or s->messages) {
 		fprintf (stderr, "Finished gc.\n");
 		fprintf (stderr, "time(ms): %s\n", intToCommaString (gcTime));
-		fprintf (stderr, "live(bytes): %s (%.1f%%)\n", 
-			intToCommaString (s->bytesLive),
-			100.0 * ((double) s->bytesLive) / size);
+		fprintf (stderr, "old gen size(bytes): %s (%.1f%%)\n", 
+				intToCommaString (s->heap.oldGenSize),
+				100.0 * ((double) s->heap.oldGenSize) / size);
 	}
 	if (DEBUG) 
 		GC_display (s, stderr);
@@ -1976,7 +2245,7 @@
 static inline void ensureFree (GC_state s, uint b) {
 	assert (s->frontier <= s->limitPlusSlop);
 	if (b > s->limitPlusSlop - s->frontier)
-		doGC (s, b);
+		doGC (s, b, FALSE);
 	assert (b <= s->limitPlusSlop - s->frontier);
 }
 
@@ -2010,7 +2279,7 @@
 		setStack (s);
 		if (t->bytesNeeded > s->limitPlusSlop - s->frontier)  {
 			enter (s);
-			doGC (s, t->bytesNeeded);
+			doGC (s, t->bytesNeeded, FALSE);
 			leave (s);
 		}
 	}
@@ -2075,9 +2344,9 @@
 		(W64)(W32)s->frontier + (W64)bytesRequested 
 		+ (W64)stackBytesRequested > (W64)(W32)s->limitPlusSlop) {
 		if (s->messages)
-			fprintf(stderr, "%s %d: doGC\n", file, line);
+			fprintf (stderr, "%s %d: doGC\n", file, line);
 		/* This GC will grow the stack, if necessary. */
-		doGC (s, bytesRequested);
+		doGC (s, bytesRequested, force);
 	} else if (not (stackTopIsOk (s, s->currentThread->stack)))
 		growStack (s);
 	else {
@@ -2128,7 +2397,7 @@
 			(uint)ensureBytesFree);
 	if (require > s->limitPlusSlop - s->frontier) {
 		enter (s);
-		doGC (s, require);
+		doGC (s, require, FALSE);
 		leave (s);
 	}
 	frontier = (W32*)s->frontier;
@@ -2174,6 +2443,7 @@
 	ensureFree (s, stackBytes (stackSize) + threadBytes ());
 	stack = newStack (s, stackSize);
 	t = (GC_thread) object (s, THREAD_HEADER, threadBytes ());
+	t->bytesNeeded = 0;
 	t->exnStack = BOGUS_EXN_STACK;
 	t->stack = stack;
 	if (DEBUG_THREADS)
@@ -2243,7 +2513,7 @@
 static inline void initSignalStack (GC_state s) {
 #if (defined (__linux__) || defined (__FreeBSD__))
         static stack_t altstack;
-	size_t ss_size = roundPage (s, SIGSTKSZ);
+	size_t ss_size = align (SIGSTKSZ, s->pageSize);
 	size_t psize = s->pageSize;
 	void *ss_sp = ssmmap (2 * ss_size, psize, psize);
 	altstack.ss_sp = ss_sp + ss_size;
@@ -2588,7 +2858,7 @@
 	initIntInfs (s);
 	initStrings (s);
 	assert (s->frontier - s->heap.oldGen <= s->bytesLive);
-	s->bytesLive = s->frontier - s->heap.oldGen;
+	s->heap.oldGenSize = s->frontier - s->heap.oldGen;
 	setNursery (s);
 	heapInit (s, &s->heap2);
 	switchToThread (s, newThreadOfSize (s, initialStackSize (s)));
@@ -2612,13 +2882,13 @@
 	unless (s->magic == magic)
 		die("Invalid world: wrong magic number.");
 	oldGen = (pointer) sfreadUint (file);
-	s->bytesLive = sfreadUint (file);
+	s->heap.oldGenSize = sfreadUint (file);
 	s->currentThread = (GC_thread) sfreadUint (file);
 	s->signalHandler = (GC_thread) sfreadUint (file);
-       	heapCreate (s, &s->heap, s->bytesLive, s->bytesLive);
+       	heapCreate (s, &s->heap, s->heap.oldGenSize, s->heap.oldGenSize);
 	setNursery (s);
 	heapInit (s, &s->heap2);
-	sfread (s->heap.oldGen, 1, s->bytesLive, file);
+	sfread (s->heap.oldGen, 1, s->heap.oldGenSize, file);
 	(*s->loadGlobals) (file);
 	unless (EOF == fgetc (file))
 		die ("Invalid world: junk at end of file.");
@@ -2626,7 +2896,7 @@
 	/* translateHeap must occur after loading the heap and globals, since it
 	 * changes pointers in all of them.
 	 */
-	translateHeap (s, oldGen, s->heap.oldGen, s->bytesLive);
+	translateHeap (s, oldGen, s->heap.oldGen, s->heap.oldGenSize);
 	setStack (s);
 }
 
@@ -2638,11 +2908,13 @@
 	initSignalStack (s);
 	s->bytesAllocated = 0;
 	s->bytesCopied = 0;
+	s->bytesCopiedMinor = 0;
 	s->bytesMarkCompacted = 0;
 	s->canHandle = 0;
+	s->cardSize = 0x1 << s->cardSizeLog2;
 	s->currentThread = BOGUS_THREAD;
+	s->doingMinorGC = FALSE;
 	rusageZero (&s->ru_gc);
-	s->generational = TRUE;
 	s->inSignalHandler = FALSE;
 	s->isOriginal = TRUE;
 	s->maxBytesLive = 0;
@@ -2652,8 +2924,10 @@
 	s->maxStackSizeSeen = 0;
 	s->messages = FALSE;
 	s->numCopyingGCs = 0;
-	s->numMarkCompactGCs = 0;
 	s->numLCs = 0;
+	s->numMarkCompactGCs = 0;
+	s->numMinorGCs = 0;
+	s->numMinorsSinceLastMajor = 0;
 	s->ramSlop = 0.80;
 	s->savedThread = BOGUS_THREAD;
 	s->signalHandler = BOGUS_THREAD;
@@ -2755,13 +3029,14 @@
 			(0.0 == time) ? 0.0 
 			: 100.0 * ((double) gcTime) / time);
 		displayUint ("maxPause(ms)", s->maxPause);
+		displayUint ("number of minor GCs", s->numMinorGCs);
 		displayUint ("number of copying GCs", s->numCopyingGCs);
 		displayUint ("number of mark compact GCs", s->numMarkCompactGCs);
-		displayUllong ("number of LCs", s->numLCs);
 		displayUllong ("bytes allocated",
 	 			s->bytesAllocated 
 				+ (s->frontier - s->heap.nursery - s->bytesLive));
-		displayUllong ("bytes copied", s->bytesCopied);
+		displayUllong ("bytes copied (minor)", s->bytesCopiedMinor);
+		displayUllong ("bytes copied (major)", s->bytesCopied);
 		displayUllong ("bytes mark-compacted", s->bytesMarkCompacted);
 		displayUint ("max bytes live", s->maxBytesLive);
 #if METER
@@ -2822,17 +3097,17 @@
 		fprintf (stderr, "Save world.\n");
 	enter (s);
 	/* Compact the heap. */
-	doGC (s, 0);
+	doGC (s, 0, TRUE);
 	sprintf (buf,
 		"Heap file created by MLton.\noldGen = 0x%08x\nbytesLive = %u\n",
 		(uint)s->heap.oldGen, (uint)s->bytesLive);
 	swrite (fd, buf, 1 + strlen(buf)); /* +1 to get the '\000' */
 	swriteUint (fd, s->magic);
 	swriteUint (fd, (uint)s->heap.oldGen);
-	swriteUint (fd, (uint)s->bytesLive);
+	swriteUint (fd, (uint)s->heap.oldGenSize);
 	swriteUint (fd, (uint)s->currentThread);
 	swriteUint (fd, (uint)s->signalHandler);
- 	swrite (fd, s->heap.oldGen, s->bytesLive);
+ 	swrite (fd, s->heap.oldGen, s->heap.oldGenSize);
 	(*s->saveGlobals) (fd);
 	leave (s);
 }
@@ -2841,16 +3116,17 @@
 	enter (s);
 	if (DEBUG or s->messages)
 		fprintf (stderr, "Packing heap of size %s.\n",
-				uintToCommaString (s->heap.size));
+				uintToCommaString (s->heap.totalSize));
 	/* Could put some code here to skip the GC if there hasn't been much
 	 * allocated since the last collection.
  	 */
-	doGC (s, 0);
+	doGC (s, 0, TRUE);
 	shrinkFromSpace (s, s->bytesLive * 1.1);
 	setNursery (s);
+	releaseToSpace (s);
 	if (DEBUG or s->messages)
 		fprintf (stderr, "Packed heap to size %s.\n",
-				uintToCommaString (s->heap.size));
+				uintToCommaString (s->heap.totalSize));
 	leave (s);
 }
 
@@ -2859,7 +3135,7 @@
 	if (DEBUG or s->messages)
 		fprintf (stderr, "Unpacking heap of size %s.\n",
 				uintToCommaString (s->heap.size));
-	s->bytesLive = s->frontier - s->heap.oldGen;
+	minorGC (s);
 	resizeHeap (s, s->bytesLive);
 	if (DEBUG or s->messages)
 		fprintf (stderr, "Unpacked heap of size %s.\n",



1.35      +9 -1      mlton/runtime/gc.h

Index: gc.h
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/gc.h,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -r1.34 -r1.35
--- gc.h	30 Jul 2002 02:48:34 -0000	1.34
+++ gc.h	7 Aug 2002 01:02:43 -0000	1.35
@@ -178,6 +178,8 @@
 	/* The order of these fields is important.  The nonpointer fields
 	 * must be first, because this object must appear to be a normal heap
 	 * object.
+	 * Furthermore, the exnStack field must be first, because the native
+	 * codegen depends on this (which is bad and should be fixed).
 	 */
 	uint exnStack;    	/* An offset added to stackBottom that specifies 
 				 * where the top of the exnStack is.
@@ -216,7 +218,6 @@
 	*/
 	uint size;
 	pointer start;		/* start of memory area */
-	pointer toSpace;
 	/* totalSize is the total length of the memory area.  i.e., the memory
 	 * range is [start, start + totalSize)
          */
@@ -248,15 +249,20 @@
 	pointer back;     	/* Points at next available word in toSpace. */
 	ullong bytesAllocated;
  	ullong bytesCopied;
+	ullong bytesCopiedMinor;
 	int bytesLive;		/* Number of bytes copied by most recent GC. */
 	ullong bytesMarkCompacted;
+	uint cardSize;
+	uint cardSizeLog2;
 	GC_thread currentThread; /* This points to a thread in the heap. */
+	bool doingMinorGC;	/* Set to true during a minor GC. */
 	uint fixedHeapSize; 	/* Only meaningful if useFixedHeap. */
 	GC_frameLayout *frameLayouts;
 	bool generational;	/* Whether or not to use generational gc. */
 	pointer *globals; 	/* An array of size numGlobals. */
 	struct GC_heap heap;
 	struct GC_heap heap2;
+	GC_heap heapp;	/* only used during GC. */
 	bool inSignalHandler; 	/* TRUE iff a signal handler is running. */
 	/* canHandle == 0 iff GC may switch to the signal handler
  	 * thread.  This is used to implement critical sections.
@@ -286,6 +292,8 @@
 	uint numGlobals;	/* Number of pointers in globals array. */
  	ullong numLCs;
  	uint numMarkCompactGCs;
+	uint numMinorGCs;
+	uint numMinorsSinceLastMajor;
 	GC_ObjectType *objectTypes; /* Array of object types. */
 	uint pageSize; /* bytes */
 	float ramSlop;



1.10      +13 -4     mlton/runtime/my-lib.c

Index: my-lib.c
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/my-lib.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- my-lib.c	27 Jun 2002 17:29:27 -0000	1.9
+++ my-lib.c	7 Aug 2002 01:02:43 -0000	1.10
@@ -154,13 +154,22 @@
  	return buf + i + 1;
 }
 
-string uintToCommaString(uint n) {
-	static char buf[BUF_SIZE];
+string uintToCommaString (uint n) {
+	static char buf1[BUF_SIZE];
+	static char buf2[BUF_SIZE];
+	static char buf3[BUF_SIZE];
+	static char buf4[BUF_SIZE];
+	static char buf5[BUF_SIZE];
+	static char *bufs[] = {buf1, buf2, buf3, buf4, buf5};
+	static int bufIndex = 0;
+	static char *buf;
 	int i;
-	
+
+	buf = bufs[bufIndex++];
+	bufIndex %= 5;
+
 	i = BUF_SIZE - 1;
 	buf[i--] = '\000';
-	
 	if (0 == n)
 		buf[i--] = '0';
         else {





-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf
_______________________________________________
MLton-devel mailing list
MLton-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mlton-devel