[MLton-devel] cvs commit: double alignment

Stephen Weeks sweeks@users.sourceforge.net
Thu, 24 Apr 2003 13:51:06 -0700


sweeks      03/04/24 13:51:05

  Modified:    doc      changelog
               include  ccodegen.h codegen.h x86codegen.h
               mlton/backend allocate-registers.fun allocate-registers.sig
                        backend.fun machine-atoms.sig machine.fun mtype.fun
                        mtype.sig representation.fun
               mlton/codegen/c-codegen c-codegen.fun
               mlton/control control.sig control.sml
               mlton/main main.sml
               runtime  gc.c gc.h my-lib.c
               runtime/basis IntInf.c
  Log:
  Added mlton flag -align {4|8}.  With -align n, the following are
  true:
  
  	The first data word of objects, stacks, and stack frames is
  	aligned mod n.
  
  	The size of stack frames and object is divisible by n.
  	object width
  
  So, -align 4 is the same as we always used to have, and -align 8 is
  new.  In addition, with -align 8, doubles stored in memory are
  guaranteed to be aligned mod 8.  This follows from the fact the
  offsets used for doubles in objects and stack slots are aligned mod 8,
  plus the fact that the start of the object or stack frame is aligned
  mod 8.
  
  The compiler changes were to the representation pass, which now
  ensures that object sizes are properly aligned and to register
  allocation, which ensures that stack frame sizes are properly
  aligned.
  
  Fixed one hole in the MACHINE type checker.  It now checks that the
  stack offset at which a label (for a Cont, Creturn, or Handler) is
  stored corresponds to the frame size associated with the label.  Of
  course, I found this by introducing a bug where this wasn't the case
  and watching the stack pop of the wrong frame size put the stackTop at
  the wrong place.
  
  Most of the changes were in the runtime, to make sure that it properly
  aligns the frontier.  Nothing very deep, just lots of details to get
  the invariant right.  The biggest change had to do with crossMaps.
  Since with -align 8, the header word is always guaranteed to be at 4
  mod 8, the crossMap used by generational GC becomes useless.  That is,
  it will never report that an object start coincides with a card start.
  So, I changed the meaning of crossmap and improved its usage.  Instead
  of having a single bit stored in a byte, I now use the entire byte to
  record the start of the last object in card (or 255 if there is no
  object start in the card).  This last object can be used as the start
  point to scan the next marked card.
  
  With -debug true, mlton now passes -DASSERT=1 to gcc when compiling C
  files.  This works nicely with asserts in ccodegen.h.
  
  I worked for a bit on doing selective alignment by frontier skipping,
  i.e., only aligning objects with doubles in them by aligning the
  frontier immediately before allocation of an object with a double.
  Unfortunately, this doesn't work well with copying GC, since depending
  on the vagaries of object order, more or less extra space might be
  needed for frontier alignment.  So a copying GC might need more space
  in to space than in from space.  I decided I didn't want to deal with
  that, so I stuck with the alignment via object padding above.

Revision  Changes    Path
1.23      +12 -0     mlton/doc/changelog

Index: changelog
===================================================================
RCS file: /cvsroot/mlton/mlton/doc/changelog,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -r1.22 -r1.23
--- changelog	18 Apr 2003 22:44:54 -0000	1.22
+++ changelog	24 Apr 2003 20:50:39 -0000	1.23
@@ -1,5 +1,17 @@
 Here are the changes since version 20030312.
 
+* 2003-04-24
+  - Added -align {4|8}, which controls alignment of objects.  With
+    -align 8, memory accesses to doubles are guaranteed to be aligned
+    mod 8, and so don't need special routines to load or store.
+
+* 2003-04-22
+  - Fixed bug that caused a total failure of time profiling with
+    -native false.  The bug was introduced with the C codegen
+    improvements that split the C into multiple files.  Now, the C
+    codegen declares all profile labels used in each file so that they
+    are global symbols. 
+
 * 2003-04-18
   - Added MLton.Weak, which supports weak pointers.
 



1.55      +66 -40    mlton/include/ccodegen.h

Index: ccodegen.h
===================================================================
RCS file: /cvsroot/mlton/mlton/include/ccodegen.h,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -r1.54 -r1.55
--- ccodegen.h	10 Apr 2003 02:03:05 -0000	1.54
+++ ccodegen.h	24 Apr 2003 20:50:40 -0000	1.55
@@ -21,7 +21,7 @@
 #define BZ(x, l)							\
 	do {								\
 		if (DEBUG_CCODEGEN)					\
-			fprintf (stderr, "%s: %d  BZ(%d, %s)\n",	\
+			fprintf (stderr, "%s:%d: BZ(%d, %s)\n",	\
 					__FILE__, __LINE__, (x), #l);	\
 		if (0 == (x)) goto l;					\
 	} while (0)
@@ -29,7 +29,7 @@
 #define BNZ(x, l)							\
 	do {								\
 		if (DEBUG_CCODEGEN)					\
-			fprintf (stderr, "%s: %d  BNZ(%d, %s)\n",	\
+			fprintf (stderr, "%s:%d: BNZ(%d, %s)\n",	\
 					__FILE__, __LINE__, (x), #l);	\
 		if (x) goto l;						\
 	} while (0)
@@ -58,7 +58,7 @@
 
 #define ChunkSwitch(n)							\
 		if (DEBUG_CCODEGEN)					\
-			fprintf (stderr, "%s: %d  entering chunk %d\n",	\
+			fprintf (stderr, "%s:%d: entering chunk %d\n",	\
 					__FILE__, __LINE__, n);		\
 		CacheFrontier();					\
 		CacheStackTop();					\
@@ -86,7 +86,7 @@
 #define Thread_returnToC()							\
 	do {									\
 		if (DEBUG_CCODEGEN)						\
-			fprintf (stderr, "%s: %d  Thread_returnToC()\n",	\
+			fprintf (stderr, "%s:%d: Thread_returnToC()\n",	\
 					__FILE__, __LINE__);			\
 		returnToC = TRUE;						\
 		return cont;							\
@@ -96,7 +96,7 @@
 /*                       main                        */
 /* ------------------------------------------------- */
 
-#define Main(cs, mg, mfs, mlw, mmc, ps, mc, ml)					\
+#define Main(al, cs, mg, mfs, mlw, mmc, ps, mc, ml)				\
 /* Globals */									\
 char CReturnC;   /* The CReturn's must be globals and cannot be per chunk */	\
 double CReturnD; /* because they may be assigned in one chunk and read in */	\
@@ -129,7 +129,7 @@
 int main (int argc, char **argv) {						\
 	struct cont cont;							\
 	gcState.native = FALSE;							\
-	Initialize(cs, mg, mfs, mlw, mmc, ps);					\
+	Initialize (al, cs, mg, mfs, mlw, mmc, ps);				\
 	if (gcState.isOriginal) {						\
 		real_Init();							\
 		PrepFarJump(mc, ml);						\
@@ -167,26 +167,10 @@
 		goto leaveChunk;		\
 	} while (0)
 
-#define Reg(name, i) local ## name ## i
-#define RC(n) Reg(c, n)
-#define RD(n) Reg(d, n)
-#define RI(n) Reg(i, n)
-#define RP(n) Reg(p, n)
-#define RU(n) Reg(u, n)
-
-#define Declare(ty, name, i) ty Reg(name, i)
-#define DC(n) Declare(uchar, c, n)
-#define DD(n) Declare(double, d, n)
-#define DI(n) Declare(int, i, n)
-#define DP(n) Declare(pointer, p, n)
-#define DU(n) Declare(uint, u, n)
 
-#define Slot(ty, i) *(ty*)(stackTop + (i))
-#define SC(i) Slot(uchar, i)
-#define SD(i) Slot(double, i)
-#define SI(i) Slot(int, i)
-#define SP(i) Slot(pointer, i)
-#define SU(i) Slot(uint, i)
+/* ------------------------------------------------- */
+/*                      Globals                      */
+/* ------------------------------------------------- */
 
 #define Global(ty, i) (global ## ty [ i ])
 #define GC(i) Global(uchar, i)
@@ -196,6 +180,28 @@
 #define GPNR(i) Global(pointerNonRoot, i)
 #define GU(i) Global(uint, i)
 
+/* ------------------------------------------------- */
+/*                     Registers                     */
+/* ------------------------------------------------- */
+
+#define Declare(ty, name, i) ty Reg(name, i)
+#define DC(n) Declare(uchar, c, n)
+#define DD(n) Declare(double, d, n)
+#define DI(n) Declare(int, i, n)
+#define DP(n) Declare(pointer, p, n)
+#define DU(n) Declare(uint, u, n)
+
+#define Reg(name, i) local ## name ## i
+#define RC(n) Reg(c, n)
+#define RD(n) Reg(d, n)
+#define RI(n) Reg(i, n)
+#define RP(n) Reg(p, n)
+#define RU(n) Reg(u, n)
+
+/* ------------------------------------------------- */
+/*                      Memory                       */
+/* ------------------------------------------------- */
+
 #define Offset(ty, b, o) (*(ty*)((b) + (o)))
 #define OC(b, i) Offset(uchar, b, i)
 #define OD(b, i) Offset(double, b, i)
@@ -214,20 +220,33 @@
 /*                       Stack                       */
 /* ------------------------------------------------- */
 
+#define Slot(ty, i) *(ty*)(stackTop + (i))
+#define SC(i) Slot(uchar, i)
+#define SD(i)							\
+	(assert (0 == ((uint)stackTop + (i)) % gcState.alignment),	\
+	Slot(double, i))
+#define SI(i) Slot(int, i)
+#define SP(i) Slot(pointer, i)
+#define SU(i) Slot(uint, i)
+
 #define ExnStack gcState.currentThread->exnStack
 #define StackBottom gcState.stackBottom
 
-#define Push(bytes)					\
-	do {						\
-		stackTop += (bytes);			\
-		assert(StackBottom <= stackTop);	\
+#define Push(bytes)							\
+	do {								\
+		if (DEBUG_CCODEGEN)					\
+			fprintf (stderr, "%s:%d: Push (%d)\n",		\
+					__FILE__, __LINE__, bytes);	\
+		stackTop += (bytes);					\
+		assert (0 == (uint)stackTop % gcState.alignment);	\
+		assert (StackBottom <= stackTop);			\
 	} while (0)
 
 #define Return()								\
 	do {									\
 		l_nextFun = *(word*)(stackTop - WORD_SIZE);			\
 		if (DEBUG_CCODEGEN)						\
-			fprintf (stderr, "%s: %d  Return()  l_nextFun = %d\n",	\
+			fprintf (stderr, "%s:%d: Return()  l_nextFun = %d\n",	\
 					__FILE__, __LINE__, l_nextFun);		\
 		goto top;							\
 	} while (0)
@@ -235,7 +254,7 @@
 #define Raise()							\
 	do {							\
 		if (DEBUG_CCODEGEN)				\
-			fprintf (stderr, "%s: %d  Raise\n", 	\
+			fprintf (stderr, "%s:%d: Raise\n", 	\
 					__FILE__, __LINE__);	\
 		stackTop = StackBottom + ExnStack;		\
 		Return();					\
@@ -268,14 +287,20 @@
 		gcState.stackTop = stackTop;	\
 	} while (0)
 
-#define CacheFrontier()				\
-	do {					\
-		frontier = gcState.frontier;	\
+#define CacheFrontier()							\
+	do {								\
+		frontier = gcState.frontier;				\
+		assert (0 == ((uint)frontier + GC_NORMAL_HEADER_SIZE)	\
+				% gcState.alignment);			\
 	} while (0)
 
-#define CacheStackTop()				\
-	do {					\
-		stackTop = gcState.stackTop;	\
+#define CacheStackTop()							\
+	do {								\
+		stackTop = gcState.stackTop;				\
+/* The following assert is not true when trampolining from one chunk	\
+ * to another as part of a Raise.					\
+ */									\
+/*		assert (0 == (uint)stackTop % gcState.alignment); */	\
 	} while (0)
 
 #define SmallIntInf(n) ((pointer)(n))
@@ -286,8 +311,9 @@
 	do {								\
 		*(word*)frontier = (h);					\
 		x = frontier + GC_NORMAL_HEADER_SIZE;			\
+		assert (0 == (uint)x % gcState.alignment);		\
 		if (DEBUG_CCODEGEN)					\
-			fprintf (stderr, "%s: %d  0x%x = Object(%d)\n",	\
+			fprintf (stderr, "%s:%d: 0x%x = Object(%d)\n",	\
 					__FILE__, __LINE__, x, h);	\
 		assert (frontier <= gcState.limitPlusSlop);		\
 	} while (0)
@@ -442,11 +468,11 @@
 		int overflow;							\
 		dst = f(n1, n2, &overflow);					\
 		if (DEBUG_CCODEGEN)						\
-			fprintf (stderr, "%s: %d " #f "(%d, %d) = %d\n",	\
+			fprintf (stderr, "%s:%d: " #f "(%d, %d) = %d\n",	\
 					__FILE__, __LINE__, n1, n2, dst);	\
 		if (overflow) {							\
 			if (DEBUG_CCODEGEN)					\
-				fprintf (stderr, "%s: %d overflow\n",		\
+				fprintf (stderr, "%s:%d: overflow\n",		\
 						__FILE__, __LINE__);		\
 			goto l;							\
 		}								\



1.10      +2 -1      mlton/include/codegen.h

Index: codegen.h
===================================================================
RCS file: /cvsroot/mlton/mlton/include/codegen.h,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- codegen.h	2 Apr 2003 02:55:55 -0000	1.9
+++ codegen.h	24 Apr 2003 20:50:41 -0000	1.10
@@ -54,7 +54,8 @@
 		LoadArray (globaluint, file);		\
 	}
 
-#define Initialize(cs, mg, mfs, mlw, mmc, ps)				\
+#define Initialize(al, cs, mg, mfs, mlw, mmc, ps)			\
+	gcState.alignment = al;						\
 	gcState.cardSizeLog2 = cs;					\
 	gcState.frameLayouts = frameLayouts;				\
 	gcState.frameLayoutsSize = cardof(frameLayouts); 		\



1.28      +2 -2      mlton/include/x86codegen.h

Index: x86codegen.h
===================================================================
RCS file: /cvsroot/mlton/mlton/include/x86codegen.h,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- x86codegen.h	30 Mar 2003 01:38:36 -0000	1.27
+++ x86codegen.h	24 Apr 2003 20:50:42 -0000	1.28
@@ -39,7 +39,7 @@
 	pointer localpointer[p];					\
 	uint localuint[u]
 
-#define Main(cs, mg, mfs, mlw, mmc, ps, ml, reserveEsp)			\
+#define Main(al, cs, mg, mfs, mlw, mmc, ps, ml, reserveEsp)		\
 void MLton_jumpToSML (pointer jump) {					\
 	word lc_stackP;							\
 			       						\
@@ -85,7 +85,7 @@
 	pointer jump;  							\
 	extern pointer ml;						\
 	gcState.native = TRUE;						\
-	Initialize(cs, mg, mfs, mlw, mmc, ps);				\
+	Initialize (al, cs, mg, mfs, mlw, mmc, ps);			\
 	if (gcState.isOriginal) {					\
 		real_Init();						\
 		jump = (pointer)&ml;   					\



1.26      +34 -27    mlton/mlton/backend/allocate-registers.fun

Index: allocate-registers.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/allocate-registers.fun,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -r1.25 -r1.26
--- allocate-registers.fun	23 Jan 2003 03:34:36 -0000	1.25
+++ allocate-registers.fun	24 Apr 2003 20:50:44 -0000	1.26
@@ -289,12 +289,9 @@
 
 structure Info =
    struct
-      type t = {
-		live: Operand.t vector,
+      type t = {live: Operand.t vector,
 		liveNoFormals: Operand.t vector,
-		size: int,
-		adjustSize: int -> {size: int, shift: int}
-		}
+		size: int}
 
       fun layout ({live, liveNoFormals, size, ...}: t) =
 	 Layout.record
@@ -380,18 +377,21 @@
 		   Kind.Cont _ => Vector.foreach (args, forceStack o #1)
 		 | _ => ()
 	     val _ =
-		Vector.foreach
-		(statements, fn s =>
-		 let
-		    datatype z = datatype R.Statement.t
-		 in
-		    case s of
-		       SetHandler h => hasHandler := true
-		     | SetExnStackLocal => hasHandler := true
-		     | SetExnStackSlot => hasHandler := true
-		     | SetSlotExnStack => hasHandler := true
-		     | _ => ()
-		 end)
+		if not (!hasHandler)
+		   andalso (Vector.exists
+			    (statements, fn s =>
+			     let
+				datatype z = datatype R.Statement.t
+			     in
+				case s of
+				   SetHandler h => true
+				 | SetExnStackLocal => true
+				 | SetExnStackSlot => true
+				 | SetSlotExnStack => true
+				 | _ => false
+			     end))
+		   then hasHandler := true
+		else ()
 	  in
 	     ()
 	  end)
@@ -518,13 +518,22 @@
 		      :: stackInit
 	     val a = Allocation.new (stackInit, registersInit)
 	     val size =
-		Runtime.labelSize
-		+ (case kind of
-		      Kind.Handler =>
-			 (case handlerLinkOffset of
-			     NONE => Error.bug "Handler with no handler offset"
-			   | SOME {handler, ...} => handler)
-		    | _ => Runtime.wordAlignInt (Allocation.stackSize a))
+		case kind of
+		   Kind.Handler =>
+		      (case handlerLinkOffset of
+			  NONE => Error.bug "Handler with no handler offset"
+			| SOME {handler, ...} =>
+			     Runtime.labelSize + handler)
+		 | _ =>
+		      let
+			 val size =
+			    Runtime.labelSize
+			    + Runtime.wordAlignInt (Allocation.stackSize a)
+		      in
+			 case !Control.align of
+			    Control.Align4 => size
+			  | Control.Align8 => Runtime.Type.align8 size
+		      end
 	     val a =
 		Vector.fold (args, a, fn ((x, _), a) =>
 			     allocateVar (x, SOME label, false, a))
@@ -537,12 +546,10 @@
 		Vector.fold (statements, a, fn (statement, a) =>
 			     R.Statement.foldDef (statement, a, one))
 	     val a = R.Transfer.foldDef (transfer, a, one)
-	     fun adjustSize _ = Error.unimplemented "adjustSize"
 	     val _ =
 		setLabelInfo (label, {live = addHS live,
 				      liveNoFormals = addHS liveNoFormals,
-				      size = size,
-				      adjustSize = adjustSize})
+				      size = size})
 	  in
 	     fn () => ()
 	  end)



1.14      +2 -5      mlton/mlton/backend/allocate-registers.sig

Index: allocate-registers.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/allocate-registers.sig,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- allocate-registers.sig	10 Dec 2002 21:45:48 -0000	1.13
+++ allocate-registers.sig	24 Apr 2003 20:50:46 -0000	1.14
@@ -38,8 +38,7 @@
 	     handlerLinkOffset: {handler: int,
 				 link: int} option,
 	     labelInfo:
-	     Rssa.Label.t -> {
-			      (* Live operands at the beginning of the block. *)
+	     Rssa.Label.t -> {(* Live operands at the beginning of the block. *)
 			      live: Machine.Operand.t vector,
 			      (* Live operands at the beginning of the block, 
 			       * excepting its formals.
@@ -48,8 +47,6 @@
 			      (* Number of bytes in frame including return
 			       * address.
 			       *)
-			      size: int,
-			      (* Adjust the number of bytes in a frame size. *)
-			      adjustSize: int -> {size: int, shift: int}
+			      size: int
 			      }}
    end



1.50      +1 -2      mlton/mlton/backend/backend.fun

Index: backend.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/backend.fun,v
retrieving revision 1.49
retrieving revision 1.50
diff -u -r1.49 -r1.50
--- backend.fun	2 Apr 2003 02:55:55 -0000	1.49
+++ backend.fun	24 Apr 2003 20:50:47 -0000	1.50
@@ -885,8 +885,7 @@
 				  transfer = M.Transfer.Goto start})
 			     end
 		     else ()
-		  val {adjustSize, live, liveNoFormals, size, ...} =
-		     labelRegInfo label
+		  val {live, liveNoFormals, size, ...} = labelRegInfo label
 		  val chunk = labelChunk label
 		  val statements =
 		     Vector.concatV



1.10      +0 -1      mlton/mlton/backend/machine-atoms.sig

Index: machine-atoms.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/machine-atoms.sig,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- machine-atoms.sig	18 Apr 2003 22:44:59 -0000	1.9
+++ machine-atoms.sig	24 Apr 2003 20:50:49 -0000	1.10
@@ -111,7 +111,6 @@
 	    val basic: (PointerTycon.t * t) vector
 	    val isOk: t -> bool
 	    val layout: t -> Layout.t
-	    val stack: t
 	    val string: t
 	    val thread: t
 	    val toRuntime: t -> Runtime.ObjectType.t



1.45      +28 -1     mlton/mlton/backend/machine.fun

Index: machine.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/machine.fun,v
retrieving revision 1.44
retrieving revision 1.45
diff -u -r1.44 -r1.45
--- machine.fun	10 Apr 2003 02:03:06 -0000	1.44
+++ machine.fun	24 Apr 2003 20:50:49 -0000	1.45
@@ -814,7 +814,7 @@
 			  else NONE
 		     | _ => NONE)
 	 end
-      
+
       fun typeCheck (program as
 		     T {chunks, frameLayouts, frameOffsets, intInfs, main,
 			maxFrameSize, objectTypes,
@@ -970,6 +970,33 @@
 		      | StackOffset {offset, ty, ...} =>
 			   offset + Type.size ty <= maxFrameSize
 			   andalso Alloc.doesDefine (alloc, x)
+			   andalso (case ty of
+				       Type.Label l =>
+					  let
+					     val Block.T {kind, ...} =
+						labelBlock l
+					     fun doit fi =
+						let
+						   val {size, ...} =
+						      getFrameInfo fi
+						in
+						   size
+						   = offset + Runtime.labelSize
+						end
+					  in
+					     case kind of
+						Kind.Cont {frameInfo, ...} =>
+						   doit frameInfo
+					      | Kind.CReturn {frameInfo, ...} =>
+						   (case frameInfo of
+						       NONE => true
+						     | SOME fi => doit fi)
+					      | Kind.Func => true
+					      | Kind.Handler {frameInfo, ...} =>
+						   doit frameInfo
+					      | Kind.Jump => true
+					  end
+				     | _ => true)
 		      | Word _ => true
 	       in
 		  Err.check ("operand", ok, fn () => Operand.layout x)



1.7       +16 -13    mlton/mlton/backend/mtype.fun

Index: mtype.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/mtype.fun,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- mtype.fun	30 Jan 2003 01:43:58 -0000	1.6
+++ mtype.fun	24 Apr 2003 20:50:51 -0000	1.7
@@ -84,22 +84,25 @@
     | Pointer => "P"
     | Uint => "U"
 
-fun doubleWordAlign (i: int): int =
-   let open Word
-   in toInt (andb (notb 0w7, (0w7 + fromInt i)))
-   end
-   
-fun wordAlign (i: int): int =
-   let open Word
-   in toInt (andb (notb 0w3, (0w3 + fromInt i)))
-   end
+local
+   fun align a b =
+      let
+	 open Word
+	 val a = fromInt a - 0w1
+      in
+	 toInt (andb (notb a, a + fromInt b))
+      end
+in
+   val align4 = align 4
+   val align8 = align 8
+end
 
 fun align (ty: t, n: int): int =
    case dest ty of
       Char => n
-    | Double => doubleWordAlign n
-    | Int => wordAlign n
-    | Pointer => wordAlign n
-    | Uint => wordAlign n
+    | Double => align8 n
+    | Int => align4 n
+    | Pointer => align4 n
+    | Uint => align4 n
 
 end



1.5       +2 -1      mlton/mlton/backend/mtype.sig

Index: mtype.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/mtype.sig,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- mtype.sig	10 Apr 2002 07:02:19 -0000	1.4
+++ mtype.sig	24 Apr 2003 20:50:51 -0000	1.5
@@ -24,6 +24,8 @@
        | Pointer
        | Uint
 
+      val align4: int -> int
+      val align8: int -> int
       val align: t * int -> int       (* align an address *)	 
       val all: t list
       val bool: t (* same as int *)
@@ -42,5 +44,4 @@
       val toString: t -> string
       val uint: t
       val word: t (* synonym for uint *)
-      val wordAlign: int -> int
    end



1.14      +17 -3     mlton/mlton/backend/representation.fun

Index: representation.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/backend/representation.fun,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- representation.fun	18 Apr 2003 22:44:59 -0000	1.13
+++ representation.fun	24 Apr 2003 20:50:51 -0000	1.14
@@ -273,7 +273,22 @@
 			       build (doubleWords, 8, ([], initialOffset))))
 		    val offset =
 		       if isNormal
-			  then Runtime.Type.align (Runtime.Type.pointer, offset)
+			  then
+			     let
+				val offset =
+				   Runtime.Type.align
+				   (Runtime.Type.pointer, offset)
+			     in
+				if !Control.align = Control.Align8
+				andalso
+				   0 < Int.rem (Runtime.normalHeaderSize
+						+ offset
+						+ (Runtime.pointerSize
+						   * List.length (!pointers)),
+						8)
+				   then offset + 4
+				else offset
+			     end
 		       else offset
 		    val (components, size) = build (pointers, 4, (accum, offset))
 		    val size = if 0 = size then 4 else size
@@ -576,8 +591,7 @@
 	 (QuickSort.sortVector
 	  (Vector.concat [ObjectType.basic,
 			  Vector.fromList (!objectTypes)],
-	   fn ((pt, _), (pt', _)) =>
-	   PointerTycon.<= (pt, pt')),
+	   fn ((pt, _), (pt', _)) => PointerTycon.<= (pt, pt')),
 	  #2)
       val _ =
 	 Control.diagnostics



1.52      +28 -15    mlton/mlton/codegen/c-codegen/c-codegen.fun

Index: c-codegen.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/codegen/c-codegen/c-codegen.fun,v
retrieving revision 1.51
retrieving revision 1.52
diff -u -r1.51 -r1.52
--- c-codegen.fun	18 Apr 2003 22:45:00 -0000	1.51
+++ c-codegen.fun	24 Apr 2003 20:50:53 -0000	1.52
@@ -138,7 +138,10 @@
 				     print i;
 				     print ">\n"))
     ; print "\n")
-   
+
+fun declareProfileLabel (l, print) =
+   C.call ("DeclareProfileLabel", [ProfileLabel.toString l], print)
+
 fun outputDeclarations
    {additionalMainArgs: string list,
     includes: string list,
@@ -146,9 +149,7 @@
     print: string -> unit,
     program = (Program.T
 	       {chunks, frameLayouts, frameOffsets, intInfs, maxFrameSize,
-		objectTypes,
-		profileInfo,
-		reals, strings, ...}),
+		objectTypes, profileInfo, reals, strings, ...}),
     rest: unit -> unit
     }: unit =
    let
@@ -227,22 +228,27 @@
 		 | Stack =>
 		      (2, 0, 0)
 		 | Weak =>
-		      (3, 1, 1)
+		      (3, 2, 1)
 		 | WeakGone =>
-		      (3, 2, 0)
+		      (3, 3, 0)
 	  in
-	     concat ["{ ", Int.toString tag, ", ",
-		     Int.toString nonPointers, ", ",
-		     Int.toString pointers, " }"]
+	     concat ["{ ", C.int tag, ", ",
+		     C.int nonPointers, ", ",
+		     C.int pointers, " }"]
 	  end)
       fun declareMain () =
 	 let
+	    val align =
+	       case !Control.align of
+		  Control.Align4 => 4
+		| Control.Align8 => 8
 	    val magic = C.word (case Random.useed () of
 				   NONE => String.hash (!Control.inputFile)
 				 | SOME w => w)
 	 in 
 	    C.callNoSemi ("Main",
-			  [C.int (!Control.cardSizeLog2),
+			  [C.int align,
+			   C.int (!Control.cardSizeLog2),
 			   magic,
 			   C.int maxFrameSize,
 			   C.bool (!Control.mayLoadWorld),
@@ -255,13 +261,11 @@
       fun declareProfileInfo () =
 	 let
 	    val ProfileInfo.T {frameSources, labels, sourceSeqs,
-			       sourceSuccessors, sources} =
+			       sourceSuccessors, sources, ...} =
 	       profileInfo
 	 in
 	    Vector.foreach (labels, fn {label, ...} =>
-			    C.call ("DeclareProfileLabel",
-				    [ProfileLabel.toString label],
-				    print))
+			    declareProfileLabel (label, print))
 	    ; declareArray ("struct GC_sourceLabel", "sourceLabels", labels,
 			    fn (_, {label, sourceSeqsIndex}) =>
 			    concat ["{(pointer)", ProfileLabel.toString label,
@@ -381,7 +385,7 @@
 	    else concat [s, " /* ", Label.toString l, " */"]
 	 end
       val handleMisalignedReals =
-	 !Control.alignDoubles = Control.AlignNo
+	 !Control.align = Control.Align4
 	 andalso !Control.hostArch = Control.Sparc
       fun addr z = concat ["&(", z, ")"]
       fun realFetch z = concat ["Real_fetch(", addr z, ")"]
@@ -566,6 +570,14 @@
 	       in
 		  ()
 	       end
+	    fun declareProfileLabels () =
+	       Vector.foreach
+	       (blocks, fn Block.T {statements, ...} =>
+		Vector.foreach
+		(statements, fn s =>
+		 case s of
+		    Statement.ProfileLabel l => declareProfileLabel (l, print)
+		  | _ => ()))
 	    fun labelFrameSize (l: Label.t): int =
 	       Program.frameSize (program, valOf (labelFrameInfo l))
 	    (* Count how many times each label is jumped to. *)
@@ -960,6 +972,7 @@
 	    print (concat ["#define CCODEGEN\n\n"])
 	    ; outputIncludes (includes, print)
 	    ; declareChunks ()
+	    ; declareProfileLabels ()
 	    ; C.callNoSemi ("Chunk", [chunkLabelToString chunkLabel], print)
 	    ; print "\n"
 	    ; declareRegisters ()



1.73      +2 -2      mlton/mlton/control/control.sig

Index: control.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/control/control.sig,v
retrieving revision 1.72
retrieving revision 1.73
diff -u -r1.72 -r1.73
--- control.sig	11 Apr 2003 04:31:11 -0000	1.72
+++ control.sig	24 Apr 2003 20:50:55 -0000	1.73
@@ -18,8 +18,8 @@
       (*            Begin Flags             *)
       (*------------------------------------*)
 
-      datatype alignDoubles = AlignNo | AlignPad | AlignSkip
-      val alignDoubles: alignDoubles ref
+      datatype align = Align4 | Align8
+      val align: align ref
 	 
       val basisLibs: string list
       val basisLibrary: string ref



1.89      +9 -9      mlton/mlton/control/control.sml

Index: control.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/control/control.sml,v
retrieving revision 1.88
retrieving revision 1.89
diff -u -r1.88 -r1.89
--- control.sml	11 Apr 2003 04:31:11 -0000	1.88
+++ control.sml	24 Apr 2003 20:50:56 -0000	1.89
@@ -11,23 +11,23 @@
 structure C = Control ()
 open C
 
-structure AlignDoubles =
+structure Align =
    struct
-      datatype t = AlignNo | AlignPad | AlignSkip
+      datatype t = Align4 | Align8
 
       val toString =
-	 fn AlignNo => "no"
-	  | AlignPad => "pad"
-	  | AlignSkip => "skip"
+	 fn Align4 => "4"
+	  | Align8 => "8"
    end
 
-datatype alignDoubles = datatype AlignDoubles.t
+datatype align = datatype Align.t
 
-val alignDoubles = control {name = "align doubles",
-			    default = AlignNo,
-			    toString = AlignDoubles.toString}
+val align = control {name = "align",
+		     default = Align4,
+		     toString = Align.toString}
    
 val basisLibs = ["basis-2002", "basis-2002-strict", "basis-1997", "basis-none"]
+   
 val basisLibrary = control {name = "basis library",
 			    default = "basis-2002",
 			    toString = fn s => s}



1.130     +7 -8      mlton/mlton/main/main.sml

Index: main.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/main/main.sml,v
retrieving revision 1.129
retrieving revision 1.130
diff -u -r1.129 -r1.130
--- main.sml	11 Apr 2003 04:31:11 -0000	1.129
+++ main.sml	24 Apr 2003 20:50:57 -0000	1.130
@@ -98,15 +98,14 @@
       List.map
       (
        [
-       (Expert, "align-doubles", " {no|pad|skip}",
-	" how to align doubles",
+       (Expert, "align", " {4|8}",
+	" object alignment",
 	(SpaceString (fn s =>
-		      alignDoubles
+		      align
 		      := (case s of
-			     "no" => AlignNo
-			   | "pad" => AlignPad
-			   | "skip" => AlignSkip
-			   | _ => usage (concat ["invalid -align-doubles flag: ",
+			     "4" => Align4
+			   | "8" => Align8
+			   | _ => usage (concat ["invalid -align flag: ",
 						 s]))))),
        (Normal, "basis", " {2002|1997|...}",
 	"select basis library to prefix to the program",
@@ -645,7 +644,7 @@
 				      val (debugSwitches, switches) =
 					 if SOME "c" = extension
 					    then
-					       (gccDebug,
+					       (gccDebug @ ["-DASSERT=1"],
 						List.concat
 						[definesAndIncludes,
 						 [concat



1.129     +302 -196  mlton/runtime/gc.c

Index: gc.c
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/gc.c,v
retrieving revision 1.128
retrieving revision 1.129
diff -u -r1.128 -r1.129
--- gc.c	18 Apr 2003 22:45:02 -0000	1.128
+++ gc.c	24 Apr 2003 20:51:00 -0000	1.129
@@ -50,8 +50,6 @@
 
 #include "IntInf.h"
 
-#define METER FALSE  /* Displays distribution of object sizes at program exit. */
-
 /* The mutator should maintain the invariants
  *
  *  function entry: stackTop + maxFrameSize <= endOfStack
@@ -65,6 +63,7 @@
 enum {
 	BOGUS_EXN_STACK = 0xFFFFFFFF,
 	COPY_CHUNK_SIZE = 0x800000,
+	CROSS_MAP_EMPTY = 255,
 	CURRENT_SOURCE_UNDEFINED = 0xFFFFFFFF,
 	DEBUG = FALSE,
 	DEBUG_ARRAY = FALSE,
@@ -80,7 +79,6 @@
 	DEBUG_THREADS = FALSE,
 	DEBUG_WEAK = FALSE,
 	FORWARDED = 0xFFFFFFFF,
-	HEADER_SIZE = WORD_SIZE,
 	PROFILE_ALLOC_MISC = 0,
 	STACK_HEADER_SIZE = WORD_SIZE,
 };
@@ -90,6 +88,7 @@
 	UNMARK_MODE,
 } MarkMode;
 
+#define EMPTY_HEADER GC_objectHeader (EMPTY_TYPE_INDEX)
 #define STACK_HEADER GC_objectHeader (STACK_TYPE_INDEX)
 #define STRING_HEADER GC_objectHeader (STRING_TYPE_INDEX)
 #define THREAD_HEADER GC_objectHeader (THREAD_TYPE_INDEX)
@@ -167,10 +166,51 @@
 	return a;	
 }
 
+static inline uint w64align (W64 a, uint b) {
+	assert (a >= 0);
+	assert (b >= 1);
+	a += b - 1;
+	a -= a % b;
+	return a;	
+}
+
 static bool isAligned (uint a, uint b) {
 	return 0 == a % b;
 }
 
+#if ASSERT
+static bool isAlignedFrontier (GC_state s, pointer p) {
+	return isAligned ((uint)p + GC_NORMAL_HEADER_SIZE, s->alignment);
+}
+
+static bool isAlignedReserved (GC_state s, uint r) {
+	return isAligned (STACK_HEADER_SIZE + sizeof (struct GC_stack) + r, 
+				s->alignment);
+}
+#endif
+
+static inline pointer alignFrontier (GC_state s, pointer p) {
+	return (pointer) (align ((uint)p + GC_NORMAL_HEADER_SIZE, s->alignment)
+				- GC_NORMAL_HEADER_SIZE);
+}
+
+pointer GC_alignFrontier (GC_state s, pointer p) {
+	return alignFrontier (s, p);
+}
+
+static inline uint stackReserved (GC_state s, uint r) {
+	uint res;
+
+	res = align (STACK_HEADER_SIZE + sizeof (struct GC_stack) + r, 
+			s->alignment)
+		- (STACK_HEADER_SIZE + sizeof (struct GC_stack));
+	if (DEBUG_STACKS)
+		fprintf (stderr, "%s = stackReserved (%s)\n",
+				uintToCommaString (res),
+				uintToCommaString (r));
+	return res;
+}
+
 #if (defined (__linux__) || defined (__FreeBSD__) || defined (__sun__))
 /* A super-safe mmap.
  *  Allocates a region of memory with dead zones at the high and low ends.
@@ -238,7 +278,7 @@
 	char *protect = "<unset>";
 
 	for (lpAddress = 0; lpAddress < (LPCVOID)0x80000000; ) {
-		VirtualQuery (lpAddress, &buf, sizeof(buf));
+		VirtualQuery (lpAddress, &buf, sizeof (buf));
 
 		switch (buf.Protect) {
 		case PAGE_READONLY:
@@ -401,7 +441,7 @@
 }
 
 static inline void rusageZero (struct rusage *ru) {
-	memset (ru, 0, sizeof(*ru));
+	memset (ru, 0, sizeof (*ru));
 }
 
 static void rusagePlusMax (struct rusage *ru1,
@@ -535,13 +575,13 @@
 			s->currentThread->bytesNeeded,
 			s->currentThread->stack->reserved,
 			s->currentThread->stack->used);
-	if (DEBUG_DETAILED) {
+	if (DEBUG_GENERATIONAL and DEBUG_DETAILED) {
 		int i;
 
-		fprintf (stderr, "crossMap trues");
+		fprintf (stderr, "crossMap trues\n");
 		for (i = 0; i < s->crossMapSize; ++i)
-			if (s->crossMap[i])
-				fprintf (stderr, " %u", i);
+			unless (CROSS_MAP_EMPTY == s->crossMap[i])
+				fprintf (stderr, "\t%u\n", i);
 		fprintf (stderr, "\n");
 	}		
 }
@@ -590,10 +630,11 @@
 	return stackSlop (s);
 }
 
-static inline uint stackBytes (uint size) {
+static inline uint stackBytes (GC_state s, uint size) {
 	uint res;
 
-	res = wordAlign (HEADER_SIZE + sizeof (struct GC_stack) + size);
+	res = align (STACK_HEADER_SIZE + sizeof (struct GC_stack) + size,
+			s->alignment);
 	if (DEBUG_STACKS)
 		fprintf (stderr, "%s = stackBytes (%s)\n",
 				uintToCommaString (res),
@@ -601,18 +642,22 @@
 	return res;
 }
 
-static inline pointer stackBottom (GC_stack stack) {
-	return ((pointer)stack) + sizeof (struct GC_stack);
+static inline pointer stackBottom (GC_state s, GC_stack stack) {
+	pointer res;
+
+	res = ((pointer)stack) + sizeof (struct GC_stack);
+	assert (isAligned ((uint)res, s->alignment));
+	return res;
 }
 
 /* Pointer to the topmost word in use on the stack. */
-static inline pointer stackTop (GC_stack stack) {
-	return stackBottom (stack) + stack->used;
+static inline pointer stackTop (GC_state s, GC_stack stack) {
+	return stackBottom (s, stack) + stack->used;
 }
 
 /* The maximum value stackTop may take on. */
 static inline pointer stackLimit (GC_state s, GC_stack stack) {
-	return stackBottom (stack) + stack->reserved - stackSlop (s);
+	return stackBottom (s, stack) + stack->reserved - stackSlop (s);
 }
 
 static inline bool stackIsEmpty (GC_stack stack) {
@@ -669,7 +714,7 @@
 	GC_frameLayout *layout;
 	
 	assert (not (stackIsEmpty (stack)));
-	layout = getFrameLayout (s, *(word*)(stackTop (stack) - WORD_SIZE));
+	layout = getFrameLayout (s, *(word*)(stackTop (s, stack) - WORD_SIZE));
 	return layout->numBytes;
 }
 
@@ -681,30 +726,33 @@
  * the stackTop is less than the stackLimit.
  */
 static inline bool stackTopIsOk (GC_state s, GC_stack stack) {
-	return stackTop (stack) 
+	return stackTop (s, stack) 
 		       	<= stackLimit (s, stack) 
 			+ (stackIsEmpty (stack) ? 0 : topFrameSize (s, stack));
 }
 
 #if ASSERT
 static bool hasBytesFree (GC_state s, W32 oldGen, W32 nursery) {
+	bool res;
+
+	res = s->oldGenSize + oldGen 
+			+ (s->canMinor ? 2 : 1) 
+				* (s->limitPlusSlop - s->nursery)
+			<= s->heap.size
+		and nursery <= s->limitPlusSlop - s->frontier;
 	if (DEBUG_DETAILED)
-		fprintf (stderr, "hasBytesFree  oldGen = %s  nursery = %s\n",
+		fprintf (stderr, "%s = hasBytesFree (%s, %s)\n",
+				boolToString (res),
 				uintToCommaString (oldGen),
 				uintToCommaString (nursery));
-	return s->oldGenSize + oldGen + (s->canMinor ? 2 : 1) * s->nurserySize
-			<= s->heap.size
-		and nursery <= s->limitPlusSlop - s->frontier;
+	return res;
 }
 #endif
 
-static inline void setFrontier (GC_state s, pointer p) {
-	s->frontier = p;
-}
-
 /* bytesRequested includes the header. */
 static pointer object (GC_state s, uint header, W32 bytesRequested,
-				bool allocInOldGen) {
+				bool allocInOldGen,
+				Bool hasDouble) {
 	pointer frontier;
 	pointer result;
 
@@ -713,7 +761,7 @@
 				header, 
 				(uint)bytesRequested,
 				boolToString (allocInOldGen));
-	assert (isAligned (bytesRequested, WORD_SIZE));
+	assert (isAligned (bytesRequested, s->alignment));
 	assert (allocInOldGen
 			? hasBytesFree (s, bytesRequested, 0)
 			: hasBytesFree (s, 0, bytesRequested));
@@ -731,21 +779,23 @@
 	}
 	GC_profileAllocInc (s, bytesRequested);
 	*(uint*)(frontier) = header;
-	result = frontier + HEADER_SIZE;
+	result = frontier + GC_NORMAL_HEADER_SIZE;
 	return result;
 }
 
-static GC_stack newStack (GC_state s, uint size, bool allocInOldGen) {
+static GC_stack newStack (GC_state s, uint reserved, bool allocInOldGen) {
 	GC_stack stack;
 
-	if (size > s->maxStackSizeSeen)
-		s->maxStackSizeSeen = size;
-	stack = (GC_stack) object (s, STACK_HEADER, stackBytes (size),
-					allocInOldGen);
-	stack->reserved = size;
+	reserved = stackReserved (s, reserved);
+	if (reserved > s->maxStackSizeSeen)
+		s->maxStackSizeSeen = reserved;
+	stack = (GC_stack) object (s, STACK_HEADER, stackBytes (s, reserved),
+					allocInOldGen, TRUE);
+	stack->reserved = reserved;
 	stack->used = 0;
-	if (DEBUG_THREADS)
-		fprintf (stderr, "0x%x = newStack (%u)\n", (uint)stack, size);
+	if (DEBUG_STACKS)
+		fprintf (stderr, "0x%x = newStack (%u)\n", (uint)stack, 
+				reserved);
 	return stack;
 }
 
@@ -753,23 +803,23 @@
 	GC_stack stack;
 
 	stack = s->currentThread->stack;
-	s->stackBottom = stackBottom (stack);
-	s->stackTop = stackTop (stack);
+	s->stackBottom = stackBottom (s, stack);
+	s->stackTop = stackTop (s, stack);
 	s->stackLimit = stackLimit (s, stack);
 	/* We must card mark the stack because it will be updated by the mutator.
 	 */
 	markCard (s, (pointer)stack);
 }
 
-static void stackCopy (GC_stack from, GC_stack to) {
+static void stackCopy (GC_state s, GC_stack from, GC_stack to) {
 	assert (from->used <= to->reserved);
 	to->used = from->used;
 	if (DEBUG_STACKS)
 		fprintf (stderr, "stackCopy from 0x%08x to 0x%08x of length %u\n",
-				(uint) stackBottom (from), 
-				(uint) stackBottom (to),
+				(uint) stackBottom (s, from), 
+				(uint) stackBottom (s, to),
 				from->used);
-	memcpy (stackBottom (to), stackBottom (from), from->used);
+	memcpy (stackBottom (s, to), stackBottom (s, from), from->used);
 }
 
 /* Number of bytes used by the stack. */
@@ -806,19 +856,22 @@
 }
 
 /* The number of bytes in an array, not including the header. */
-static inline uint arrayNumBytes (pointer p, 
-				     uint numPointers,
-				     uint numNonPointers) {
-	uint numElements, bytesPerElement, result;
+static inline uint arrayNumBytes (GC_state s,
+					pointer p, 
+					uint numPointers,
+					uint numNonPointers) {
+	uint bytesPerElement;
+	uint numElements;
+	uint result;
 	
 	numElements = GC_arrayNumElements (p);
 	bytesPerElement = numNonPointers + toBytes (numPointers);
-	result = wordAlign (numElements * bytesPerElement);
+	result = numElements * bytesPerElement;
 	/* Empty arrays have POINTER_SIZE bytes for the forwarding pointer */
 	if (0 == result) 
 		result = POINTER_SIZE;
-	
-	return result;
+	return align (result + GC_ARRAY_HEADER_SIZE, s->alignment) 
+		- GC_ARRAY_HEADER_SIZE;
 }
 
 /* ---------------------------------------------------------------- */
@@ -860,13 +913,13 @@
 	} else if (WEAK_TAG == tag) {
 		if (not skipWeaks and 1 == numPointers)
 			maybeCall (f, s, (pointer*)&(((GC_weak)p)->object));
-		p += 2 * WORD_SIZE;
+		p += sizeof (struct GC_weak);
 	} else if (ARRAY_TAG == tag) {
 		uint numBytes;
 		pointer max;
 
 		assert (ARRAY_TAG == tag);
-		numBytes = arrayNumBytes (p, numPointers, numNonPointers);
+		numBytes = arrayNumBytes (s, p, numPointers, numNonPointers);
 		max = p + numBytes;
 		if (numPointers == 0) {
 			/* There are no pointers, just update p. */
@@ -904,9 +957,9 @@
 
 		assert (STACK_TAG == tag);
 		stack = (GC_stack)p;
-		bottom = stackBottom (stack);
-		top = stackTop (stack);
-		assert(stack->used <= stack->reserved);
+		bottom = stackBottom (s, stack);
+		top = stackTop (s, stack);
+		assert (stack->used <= stack->reserved);
 		while (top > bottom) {
 			/* Invariant: top points just past a "return address". */
 			returnAddress = *(word*) (top - WORD_SIZE);
@@ -934,7 +987,7 @@
 			}
 		}
 		assert(top == bottom);
-		p += sizeof(struct GC_stack) + stack->reserved;
+		p += sizeof (struct GC_stack) + stack->reserved;
 	}
 	return p;
 }
@@ -946,16 +999,20 @@
 /* If p points at the beginning of an object, then toData p returns a pointer 
  * to the start of the object data.
  */
-static inline pointer toData (pointer p) {
-	word header;	
+static inline pointer toData (GC_state s, pointer p) {
+	word header;
+	pointer res;
 
+	assert (isAlignedFrontier (s, p));
 	header = *(word*)p;
 	if (0 == header)
 		/* Looking at the counter word in an array. */
-		return p + GC_ARRAY_HEADER_SIZE;
+		res = p + GC_ARRAY_HEADER_SIZE;
 	else
 		/* Looking at a header word. */
-		return p + GC_NORMAL_HEADER_SIZE;
+		res = p + GC_NORMAL_HEADER_SIZE;
+	assert (isAligned ((uint)res, s->alignment));
+	return res;
 }
 
 /* ---------------------------------------------------------------- */
@@ -980,6 +1037,7 @@
 						GC_pointerFun f) {
 	pointer b;
 
+	assert (isAlignedFrontier (s, front));
 	if (DEBUG_DETAILED)
 		fprintf (stderr, "foreachPointerInRange  front = 0x%08x  *back = 0x%08x\n",
 				(uint)front, *(uint*)back);
@@ -992,7 +1050,7 @@
 				fprintf (stderr, "front = 0x%08x  *back = 0x%08x\n",
 						(uint)front, *(uint*)back);
 			front = foreachPointerInObject 
-					(s, toData (front), skipWeaks, f);
+					(s, toData (s, front), skipWeaks, f);
 		}
 		b = *back;
 	}
@@ -1085,15 +1143,13 @@
 	/* Heap */
 	assert (isAligned (s->heap.size, s->pageSize));
 	assert (isAligned ((uint)s->heap.start, s->cardSize));
-	assert (isAligned (s->oldGenSize, WORD_SIZE));
-	assert (isAligned ((uint)s->nursery, WORD_SIZE));
-	assert (isAligned (s->nurserySize, WORD_SIZE));
-	assert (isAligned ((uint)s->frontier, WORD_SIZE));
+	assert (isAlignedFrontier (s, s->heap.start + s->oldGenSize));
+	assert (isAlignedFrontier (s, s->nursery));
+	assert (isAlignedFrontier (s, s->frontier));
 	assert (s->nursery <= s->frontier);
 	unless (0 == s->heap.size) {
 		assert (s->nursery <= s->frontier);
 		assert (s->frontier <= s->limitPlusSlop);
-		assert (s->limitPlusSlop == s->nursery + s->nurserySize);
 		assert (s->limit == s->limitPlusSlop - LIMIT_SLOP);
 		assert (hasBytesFree (s, 0, 0));
 	}
@@ -1103,7 +1159,7 @@
 	back = s->heap.start + s->oldGenSize;
 	if (DEBUG_DETAILED)
 		fprintf (stderr, "Checking old generation.\n");
-	foreachPointerInRange (s, s->heap.start, &back, FALSE,
+	foreachPointerInRange (s, alignFrontier (s, s->heap.start), &back, FALSE,
 				assertIsInFromSpace);
 	if (DEBUG_DETAILED)
 		fprintf (stderr, "Checking nursery.\n");
@@ -1111,9 +1167,9 @@
 				assertIsInFromSpace);
 	/* Current thread. */
 	stack = s->currentThread->stack;
-	assert (isAligned (stack->reserved, WORD_SIZE));
-	assert (s->stackBottom == stackBottom (stack));
-	assert (s->stackTop == stackTop (stack));
+	assert (isAlignedReserved (s, stack->reserved));
+	assert (s->stackBottom == stackBottom (s, stack));
+	assert (s->stackTop == stackTop (s, stack));
  	assert (s->stackLimit == stackLimit (s, stack));
 	assert (stack->used == currentStackUsed (s));
 	assert (stack->used <= stack->reserved);
@@ -1288,11 +1344,6 @@
 	}
 }
 
-static inline void setLimit (GC_state s) {
-	s->limitPlusSlop = s->nursery + s->nurserySize;
-	s->limit = s->limitPlusSlop - LIMIT_SLOP;
-}
-
 static void clearCardMap (GC_state s) {
 	memset (s->cardMap, 0, s->cardMapSize);
 }
@@ -1300,14 +1351,19 @@
 static void setNursery (GC_state s, W32 oldGenBytesRequested,
 				W32 nurseryBytesRequested) {
 	GC_heap h;
+	uint nurserySize;
 
 	if (DEBUG_DETAILED)
 		fprintf (stderr, "setNursery.  oldGenBytesRequested = %s  frontier = 0x%08x\n",  
 				uintToCommaString (oldGenBytesRequested),
 				(uint)s->frontier);
 	h = &s->heap;
-	s->nurserySize = h->size - s->oldGenSize - oldGenBytesRequested;
-	assert (isAligned (s->nurserySize, WORD_SIZE));
+	assert (isAlignedFrontier (s, h->start + s->oldGenSize 
+					+ oldGenBytesRequested));
+	nurserySize = h->size - s->oldGenSize - oldGenBytesRequested;
+	s->limitPlusSlop = h->start + h->size;
+	s->limit = s->limitPlusSlop - LIMIT_SLOP;
+	assert (isAligned (nurserySize, WORD_SIZE));
 	if (	/* The mutator marks cards. */
 		s->mutatorMarksCards
 		/* The live ratio is low enough to make generational GC
@@ -1319,30 +1375,41 @@
 				: s->markCompactGenerationalRatio)
 		/* The nursery is large enough to be worth it. */
 		and ((float)(h->size - s->bytesLive) 
-			/ (float)s->nurserySize) <= s->nurseryRatio
+			/ (float)nurserySize) <= s->nurseryRatio
 		/* There is enough space in the nursery. */
-		and s->nurserySize >= 2 * nurseryBytesRequested
+		and nurseryBytesRequested 
+			<= s->limitPlusSlop
+				- alignFrontier (s, s->limitPlusSlop
+							- nurserySize/2 + 2)
 		) {
 		s->canMinor = TRUE;
-		s->nurserySize /= 2;
-		unless (isAligned (s->nurserySize, WORD_SIZE))
-			s->nurserySize -= 2;
+		nurserySize /= 2;
+		unless (isAligned (nurserySize, WORD_SIZE))
+			nurserySize -= 2;
 		clearCardMap (s);
 	} else {
-		if (s->nurserySize < nurseryBytesRequested)
+		unless (nurseryBytesRequested 
+				<= s->limitPlusSlop
+					- alignFrontier (s, s->limitPlusSlop
+								- nurserySize))
 			die ("Out of memory.  Insufficient space in nursery.");
 		s->canMinor = FALSE;
 	}
-	s->nursery = h->start + h->size - s->nurserySize;
-	setFrontier (s, s->nursery);
-	setLimit (s);
-	assert (isAligned (s->nurserySize, WORD_SIZE));
-	assert (isAligned ((uint)s->nursery, WORD_SIZE));
+	assert (nurseryBytesRequested 
+			<= s->limitPlusSlop
+				- alignFrontier (s, s->limitPlusSlop 
+							- nurserySize));
+	s->nursery = alignFrontier (s, s->limitPlusSlop - nurserySize);
+	s->frontier = s->nursery;
+	assert (nurseryBytesRequested <= s->limitPlusSlop - s->frontier);
+	assert (isAlignedFrontier (s, s->nursery));
 	assert (hasBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
 }
 
 static inline void clearCrossMap (GC_state s) {
-	memset (s->crossMap, 0, s->crossMapSize);
+	if (DEBUG_GENERATIONAL and DEBUG_DETAILED)
+		fprintf (stderr, "clearCrossMap ()\n");
+	memset (s->crossMap, CROSS_MAP_EMPTY, s->crossMapSize);
 }
 
 static void setCardMapForMutator (GC_state s) {
@@ -1380,6 +1447,7 @@
 				uintToCommaString (s->cardMapSize));
 	s->crossMapSize = s->cardMapSize;
 	s->crossMap = smmap (s->crossMapSize);
+	clearCrossMap (s);
 }
 
 /* heapCreate (s, h, need, minSize) allocates a heap of the size necessary to
@@ -1460,14 +1528,27 @@
 }
 
 static inline void setCrossMap (GC_state s, pointer p) {
-	if (s->mutatorMarksCards and isAligned ((uint)p, s->cardSize)) {
+	if (s->mutatorMarksCards) {
 		GC_heap h;
+		uint cardIndex;
+		pointer cardStart;
+		uint offset;
 
 		h = s->crossMapHeap;
+		/* The p - 1 is so that a pointer to the beginning of a card
+		 * falls into the index for the previous crossMap entry.
+		 */
+		cardStart =
+			(p == h->start)
+			? h->start
+			: (p - 1) - ((uint)(p - 1) % s->cardSize);
+		cardIndex = divCardSize (s, cardStart - h->start);
+		offset = (p - cardStart) / WORD_SIZE;
+		assert (offset < CROSS_MAP_EMPTY);
 		if (DEBUG_GENERATIONAL)
-			fprintf (stderr, "crossMap[%u] = TRUE\n",
-					divCardSize (s, p - h->start));
-		s->crossMap[divCardSize (s, p - h->start)] = '\001';
+			fprintf (stderr, "crossMap[%u] = %u\n", 
+					cardIndex, offset);
+		s->crossMap[cardIndex] = offset;
 	}
 }
 
@@ -1483,14 +1564,14 @@
 		objectBytes = toBytes (numPointers + numNonPointers);
 	} else if (ARRAY_TAG == tag) {
 		headerBytes = GC_ARRAY_HEADER_SIZE;
-		objectBytes = arrayNumBytes (p, numPointers, numNonPointers);
+		objectBytes = arrayNumBytes (s, p, numPointers, numNonPointers);
 	} else if (WEAK_TAG == tag) {
 		headerBytes = GC_NORMAL_HEADER_SIZE;
-		objectBytes = 2 * WORD_SIZE;
+		objectBytes = sizeof (struct GC_weak);
 	} else { /* Stack. */
 		assert (STACK_TAG == tag);
 		headerBytes = STACK_HEADER_SIZE;
-		objectBytes = sizeof(struct GC_stack) + ((GC_stack)p)->reserved;
+		objectBytes = sizeof (struct GC_stack) + ((GC_stack)p)->reserved;
 	}
 	return headerBytes + objectBytes;
 }
@@ -1499,13 +1580,9 @@
 /*                    Cheney Copying Collection                     */
 /* ---------------------------------------------------------------- */
 
-#if METER
-int sizes[25600];
-#endif
-
 /* forward (s, pp) forwards the object pointed to by *pp and updates *pp to 
  * point to the new object. 
- * It also updates the crossMap if the object starts a card boundary.
+ * It also updates the crossMap.
  */
 static inline void forward (GC_state s, pointer *pp) {
 	pointer p;
@@ -1517,6 +1594,8 @@
 	assert (isInFromSpace (s, *pp));
 	p = *pp;
 	header = GC_getHeader (p);
+	if (DEBUG_DETAILED and FORWARDED == header)
+		fprintf (stderr, "already FORWARDED\n");
 	if (header != FORWARDED) { /* forward the object */
 		uint headerBytes, objectBytes, size, skip;
 		uint numPointers, numNonPointers;
@@ -1529,12 +1608,12 @@
 			skip = 0;
 		} else if (ARRAY_TAG == tag) {
 			headerBytes = GC_ARRAY_HEADER_SIZE;
-			objectBytes = arrayNumBytes (p, numPointers,
-								numNonPointers);
+			objectBytes = arrayNumBytes (s, p, numPointers,
+							numNonPointers);
 			skip = 0;
 		} else if (WEAK_TAG == tag) {
 			headerBytes = GC_NORMAL_HEADER_SIZE;
-			objectBytes = 2 * WORD_SIZE;
+			objectBytes = sizeof (struct GC_weak);
 			skip = 0;
 		} else { /* Stack. */
 			GC_stack stack;
@@ -1548,7 +1627,8 @@
 			if (stack->used <= stack->reserved / 4) {
 				W32 new;
 
-				new = wordAlign (max (stack->reserved / 2, 
+				new = stackReserved
+					 (s, max (stack->reserved / 2, 
 							stackNeedsReserved (s, stack)));
 				/* It's possible that new > stack->reserved if
 				 * the stack is the current one and the stack
@@ -1567,10 +1647,10 @@
 		size = headerBytes + objectBytes;
 		assert (s->back + size + skip <= s->toLimit);
   		/* Copy the object. */
-		if (DEBUG_DETAILED)
-			fprintf (stderr, "copying from 0x%08x to 0x%08x of size %u\n",
-					(uint)p, (uint)s->back, size);
 		copy (p - headerBytes, s->back, size);
+		/* If the object has a valid weak pointer, link it into the weaks
+		 * for update after the copying GC is done.
+		 */
 		if (WEAK_TAG == tag and 1 == numPointers) {
 			GC_weak w;
 
@@ -1590,16 +1670,14 @@
 					fprintf (stderr, "not linking\n");
 			}
 		}
-#if METER
-		if (size < sizeof(sizes)/sizeof(sizes[0])) sizes[size]++;
-#endif
  		/* Store the forwarding pointer in the old object. */
 		*(word*)(p - WORD_SIZE) = FORWARDED;
 		*(pointer*)p = s->back + headerBytes;
 		setCrossMap (s, s->back);
 		/* Update the back of the queue. */
 		s->back += size + skip;
-		assert (isAligned ((uint)s->back, WORD_SIZE));
+		assert (isAligned ((uint)s->back + GC_NORMAL_HEADER_SIZE,
+					s->alignment));
 	}
 	*pp = *(pointer*)p;
 	assert (isInToSpace (s, *pp));
@@ -1633,12 +1711,12 @@
 	h = s->heap2;
 	s->heap2 = s->heap;
 	s->heap = h;
-	setLimit (s);
 	setCardMapForMutator (s);
 }
 
 static void cheneyCopy (GC_state s) {
 	struct rusage ru_start;
+	pointer toStart;
 
 	assert (s->heap2.size >= s->oldGenSize);
 	startTiming (&ru_start);
@@ -1657,14 +1735,15 @@
 	}
 	assert (s->heap2.start != (void*)NULL);
 	/* The next assert ensures there is enough space for the copy to succeed.
-	 * It does not say  assert (s->heap2.size >= s->heap.size) because that
+	 * It does not assert (s->heap2.size >= s->heap.size) because that
          * is too strong.
 	 */
 	assert (s->heap2.size >= s->oldGenSize);
 	clearCrossMap (s);
-	s->back = s->heap2.start;
+	toStart = alignFrontier (s, s->heap2.start);
+	s->back = toStart;
 	foreachGlobal (s, forward);
-	foreachPointerInRange (s, s->heap2.start, &s->back, TRUE, forward);
+	foreachPointerInRange (s, toStart, &s->back, TRUE, forward);
 	updateWeaks (s);
 	s->oldGenSize = s->back - s->heap2.start;
 	s->bytesCopied += s->oldGenSize;
@@ -1690,7 +1769,7 @@
 	if (DEBUG_GENERATIONAL)
 		fprintf (stderr, "intergenerational pointer from 0x%08x to 0x%08x\n",
 			(uint)pp, *(uint*)pp);
-	assert (s->nursery <= p and p < s->nursery + s->nurserySize);
+	assert (s->nursery <= p and p < s->limitPlusSlop);
 	forward (s, pp);
 }
 
@@ -1698,7 +1777,8 @@
 static void forwardInterGenerationalPointers (GC_state s) {
 	pointer cardMap;
 	uint cardNum;
-	pointer crossMap;
+	pointer cardStart;
+	uchar *crossMap;
 	GC_heap h;
 	uint numCards;
 	pointer objectStart;
@@ -1715,10 +1795,12 @@
 	oldGenStart = s->heap.start;
 	oldGenEnd = oldGenStart + s->oldGenSize;
 	/* Loop variables*/
-	objectStart = s->heap.start;
+	objectStart = alignFrontier (s, s->heap.start);
 	cardNum = 0;
+	cardStart = oldGenStart;
 checkAll:
 	assert (cardNum <= numCards);
+	assert (isAlignedFrontier (s, objectStart));
 	if (cardNum == numCards)
 		goto done;
 checkCard:
@@ -1729,7 +1811,6 @@
 				(uint)oldGenStart + cardNumToSize (s, cardNum + 1));
 	assert (objectStart < oldGenStart + cardNumToSize (s, cardNum + 1));
 	if (cardMap[cardNum]) {
-		pointer cardStart;
 		pointer cardEnd;
 		pointer orig;
 		uint size;
@@ -1738,10 +1819,10 @@
 		if (DEBUG_GENERATIONAL)
 			fprintf (stderr, "card %u is marked  objectStart = 0x%08x\n", 
 					cardNum, (uint)objectStart);
-		cardStart = oldGenStart + cardNumToSize (s, cardNum);
 		orig = objectStart;
 skipObjects:
-		size = objectSize (s, toData (objectStart));
+		assert (isAlignedFrontier (s, objectStart));
+		size = objectSize (s, toData (s, objectStart));
 		if (objectStart + size < cardStart) {
 			objectStart += size;
 			goto skipObjects;
@@ -1766,15 +1847,19 @@
 		if (objectStart == oldGenEnd)
 			goto done;
 		cardNum = divCardSize (s, objectStart - oldGenStart);
+		cardStart = oldGenStart + cardNumToSize (s, cardNum);
 		goto checkCard;
 	} else {
+		unless (CROSS_MAP_EMPTY == crossMap[cardNum])
+			objectStart = cardStart + crossMap[cardNum] * WORD_SIZE;
+		if (DEBUG_GENERATIONAL)
+			fprintf (stderr, "card %u is not marked  crossMap[%u] == %u  objectStart = 0x%08x\n", 
+					cardNum,
+					cardNum, 
+					crossMap[cardNum] * WORD_SIZE,
+					(uint)objectStart);
 		cardNum++;
-		if (crossMap[cardNum]) {
-			objectStart = oldGenStart + cardNumToSize (s, cardNum);
-			if (DEBUG_GENERATIONAL)
-				fprintf (stderr, "crossMap[%u] == TRUE   objectStart = 0x%08x\n", 
-						cardNum, (uint)objectStart);
-		}
+		cardStart += s->cardSize;
 		goto checkAll;
 	}
 	assert (FALSE);
@@ -1789,7 +1874,8 @@
 	struct rusage ru_start;
 
 	if (DEBUG_GENERATIONAL)
-		fprintf (stderr, "minorGC  frontier = 0x%08x\n", 
+		fprintf (stderr, "minorGC  nursery = 0x%08x  frontier = 0x%08x\n", 
+				(uint)s->nursery,
 				(uint)s->frontier);
 	assert (invariant (s));
 	bytesAllocated = s->frontier - s->nursery;
@@ -1805,7 +1891,11 @@
 		startTiming (&ru_start);
 		s->amInMinorGC = TRUE;
 		s->toSpace = s->heap.start + s->oldGenSize;
-		s->toLimit = s->toSpace + s->nurserySize;
+		if (DEBUG_GENERATIONAL)
+			fprintf (stderr, "toSpace = 0x%08x\n",
+					(uint)s->toSpace);
+		assert (isAlignedFrontier (s, s->toSpace));
+		s->toLimit = s->toSpace + bytesAllocated;
 		assert (invariant (s));
 		s->numMinorGCs++;
 		s->numMinorsSinceLastMajor++;
@@ -1854,7 +1944,8 @@
 /* mark (s, p) sets all the mark bits in the object graph pointed to by p. 
  * If the mode is MARK, it sets the bits to 1.
  * If the mode is UNMARK, it sets the bits to 0.
- * It returns the amount marked.
+ *
+ * It returns the total size in bytes of the objects marked.
  */
 W32 mark (GC_state s, pointer root, MarkMode mode) {
 	pointer cur;  /* The current object being marked. */
@@ -1955,7 +2046,7 @@
 		*headerp = header;
 		goto ret;
 	} else if (ARRAY_TAG == tag) {
-		numBytes = arrayNumBytes (cur, numPointers, numNonPointers);
+		numBytes = arrayNumBytes (s, cur, numPointers, numNonPointers);
 		size += GC_ARRAY_HEADER_SIZE + numBytes;
 		*headerp = header;
 		if (0 == numPointers or 0 == GC_arrayNumElements (cur))
@@ -1990,19 +2081,19 @@
 	} else {
 		assert (STACK_TAG == tag);
 		*headerp = header;
-		size += stackBytes (((GC_stack)cur)->reserved);
-		top = stackTop ((GC_stack)cur);
+		size += stackBytes (s, ((GC_stack)cur)->reserved);
+		top = stackTop (s, (GC_stack)cur);
 		assert (((GC_stack)cur)->used <= ((GC_stack)cur)->reserved);
 markInStack:
 		/* Invariant: top points just past the return address of the
 		 * frame to be marked.
 		 */
-		assert (stackBottom ((GC_stack)cur) <= top);
+		assert (stackBottom (s, (GC_stack)cur) <= top);
 		if (DEBUG_MARK_COMPACT)
 			fprintf (stderr, "markInStack  top = %d\n",
-					top - stackBottom ((GC_stack)cur));
+					top - stackBottom (s, (GC_stack)cur));
 					
-		if (top == stackBottom ((GC_stack)(cur)))
+		if (top == stackBottom (s, (GC_stack)(cur)))
 			goto ret;
 		index = 0;
 		layout = getFrameLayout (s, *(word*) (top - WORD_SIZE));
@@ -2068,7 +2159,8 @@
 		index++;
 		goto markInNormal;
 	} else if (ARRAY_TAG == tag) {
-		max = prev + arrayNumBytes (prev, numPointers, numNonPointers);
+		max = prev + arrayNumBytes (s, prev, numPointers,
+						numNonPointers);
 		index = arrayCounter (prev);
 		todo = prev + index * POINTER_SIZE;
 		next = cur;
@@ -2162,8 +2254,8 @@
 
 	if (DEBUG_MARK_COMPACT)
 		fprintf (stderr, "Update forward pointers.\n");
-	front = s->heap.start;
-	back = front + s->oldGenSize;
+	front = alignFrontier (s, s->heap.start);
+	back = s->heap.start + s->oldGenSize;
 	endOfLastMarked = front;
 	gap = 0;
 updateObject:
@@ -2260,8 +2352,8 @@
 
 	if (DEBUG_MARK_COMPACT)
 		fprintf (stderr, "Update backward pointers and slide.\n");
-	front = s->heap.start;
-	back = front + s->oldGenSize;
+	front = alignFrontier (s, s->heap.start);
+	back = s->heap.start + s->oldGenSize;
 	gap = 0;
 updateObject:
 	if (DEBUG_MARK_COMPACT)
@@ -2384,7 +2476,8 @@
 	/* Translate globals and heap. */
 	foreachGlobal (s, translatePointer);
 	limit = to + size;
-	foreachPointerInRange (s, to, &limit, FALSE, translatePointer);
+	foreachPointerInRange (s, alignFrontier (s, to), &limit, FALSE,
+				translatePointer);
 }
 
 /* ---------------------------------------------------------------- */
@@ -2589,10 +2682,10 @@
 	size = 2 * s->currentThread->stack->reserved;
 	if (DEBUG_STACKS or s->messages)
 		fprintf (stderr, "Growing stack to size %s.\n",
-				uintToCommaString (stackBytes (size)));
-	assert (hasBytesFree (s, stackBytes (size), 0));
+				uintToCommaString (stackBytes (s, size)));
+	assert (hasBytesFree (s, stackBytes (s, size), 0));
 	stack = newStack (s, size, TRUE);
-	stackCopy (s->currentThread->stack, stack);
+	stackCopy (s, s->currentThread->stack, stack);
 	s->currentThread->stack = stack;
 	markCard (s, (pointer)s->currentThread);
 }
@@ -2664,7 +2757,7 @@
 	stackBytesRequested =
 		stackTopOk
 		? 0 
-		: stackBytes (2 * s->currentThread->stack->reserved);
+		: stackBytes (s, 2 * s->currentThread->stack->reserved);
 	totalBytesRequested = 
 		(W64)oldGenBytesRequested 
 		+ stackBytesRequested
@@ -2806,10 +2899,6 @@
 /*                         GC_arrayAllocate                         */
 /* ---------------------------------------------------------------- */
 
-static inline W64 w64align (W64 w) {
- 	return ((w + 3) & ~ 3);
-}
-
 pointer GC_arrayAllocate (GC_state s, W32 ensureBytesFree, W32 numElts, 
 				W32 header) {
 	uint numPointers;
@@ -2827,14 +2916,15 @@
 			or (numPointers == 0 and numNonPointers > 0));
 	eltSize = numPointers * POINTER_SIZE + numNonPointers;
 	arraySize64 = 
-		w64align((W64)eltSize * (W64)numElts + GC_ARRAY_HEADER_SIZE);
+		w64align ((W64)eltSize * (W64)numElts + GC_ARRAY_HEADER_SIZE,
+				s->alignment);
 	if (arraySize64 >= 0x100000000llu)
 		die ("Out of memory: cannot allocate array with %s bytes.\n",
 			ullongToCommaString (arraySize64));
 	arraySize = (W32)arraySize64;
-	if (3 * WORD_SIZE == arraySize)
-		/* array is empty -- create space for forwarding pointer. */
- 		arraySize = 4 * WORD_SIZE;
+	if (arraySize < GC_ARRAY_HEADER_SIZE + WORD_SIZE)
+		/* Create space for forwarding pointer. */
+ 		arraySize = GC_ARRAY_HEADER_SIZE + WORD_SIZE;
 	if (DEBUG_ARRAY)
 		fprintf (stderr, "array with %s elts of size %u and total size %s.  ensure %s bytes free.\n",
 			uintToCommaString (numElts), 
@@ -2860,6 +2950,7 @@
 		}
 		frontier = (W32*)s->frontier;
 		last = (W32*)((pointer)frontier + arraySize);
+		assert (isAlignedFrontier (s, (pointer)last));
 		s->frontier = (pointer)last;
 	}
 	*frontier++ = 0; /* counter word */
@@ -2887,21 +2978,31 @@
 /*                             Threads                              */
 /* ---------------------------------------------------------------- */
 
-static inline uint threadBytes () {
-	return wordAlign(HEADER_SIZE + sizeof(struct GC_thread));
+static inline uint threadBytes (GC_state s) {
+	uint res;
+
+	res = GC_NORMAL_HEADER_SIZE + sizeof (struct GC_thread);
+	/* The following assert depends on struct GC_thread being the right
+ 	 * size.  Right now, it happens that res = 16, which is aligned mod 4
+	 * and mod 8, which is all that we need.  If the struct every changes
+	 * (possible) or we need more alignment (doubtful), we may need to put
+	 * some padding at the beginning.
+	 */
+	assert (isAligned (res, s->alignment));
+	return res;
 }
 
 static inline uint initialThreadBytes (GC_state s) {
-	return threadBytes () + stackBytes (initialStackSize (s));
+	return threadBytes (s) + stackBytes (s, initialStackSize (s));
 }
 
 static GC_thread newThreadOfSize (GC_state s, uint stackSize) {
 	GC_stack stack;
 	GC_thread t;
 
-	ensureFree (s, stackBytes (stackSize) + threadBytes ());
+	ensureFree (s, stackBytes (s, stackSize) + threadBytes (s));
 	stack = newStack (s, stackSize, FALSE);
-	t = (GC_thread) object (s, THREAD_HEADER, threadBytes (), FALSE);
+	t = (GC_thread) object (s, THREAD_HEADER, threadBytes (s), FALSE, FALSE);
 	t->bytesNeeded = 0;
 	t->exnStack = BOGUS_EXN_STACK;
 	t->stack = stack;
@@ -2929,7 +3030,7 @@
 		fprintf (stderr, "0x%08x = copyThread (0x%08x)\n", 
 				(uint)to, (uint)from);
 	}
-	stackCopy (from->stack, to->stack);
+	stackCopy (s, from->stack, to->stack);
 	to->exnStack = from->exnStack;
 	return to;
 }
@@ -2943,7 +3044,10 @@
 	enter (s);
 	t = s->currentThread;
 	res = copyThread (s, t, t->stack->used);
-	assert (res->stack->reserved == res->stack->used);
+/* The following assert is no longer true, since alignment restrictions can force
+ * the reserved to be slightly larger than the used.
+ */
+/*	assert (res->stack->reserved == res->stack->used); */
 	leave (s);
 	if (DEBUG_THREADS)
 		fprintf (stderr, "0x%08x = GC_copyCurrentThread\n", (uint)res);
@@ -2958,7 +3062,7 @@
 	if (DEBUG_THREADS)
 		fprintf (stderr, "GC_copyThread (0x%08x)\n", (uint)t);
 	enter (s);
-	assert (t->stack->reserved == t->stack->used);
+/*	assert (t->stack->reserved == t->stack->used); */
 	res = copyThread (s, t, stackNeedsReserved (s, t->stack));
 	assert (stackTopIsOk (s, res->stack));
 	leave (s);
@@ -2980,7 +3084,7 @@
 	if (DEBUG_PROFILE)
 		fprintf (stderr, "walking stack");
 	assert (s->native);
-	bottom = stackBottom (s->currentThread->stack);
+	bottom = stackBottom (s, s->currentThread->stack);
 	if (DEBUG_PROFILE)
 		fprintf (stderr, "  bottom = 0x%08x  top = 0x%08x.\n",
 				(uint)bottom, (uint)s->stackTop);
@@ -3323,7 +3427,7 @@
 
 	s->profile = GC_profileNew (s);
 	/* Sort sourceLabels by address. */
-	qsort (s->sourceLabels, s->sourceLabelsSize, sizeof(*s->sourceLabels),
+	qsort (s->sourceLabels, s->sourceLabelsSize, sizeof (*s->sourceLabels),
 		compareProfileLabels);
 	if (DEBUG_PROFILE)
 		for (i = 0; i < s->sourceLabelsSize; ++i)
@@ -3605,18 +3709,19 @@
 	for (i = 0; i < s->intInfInitsSize; ++i) {
 		numElements = strlen (s->intInfInits[i].mlstr);
 		s->bytesLive +=
-			GC_ARRAY_HEADER_SIZE + WORD_SIZE // for the sign
-			+ ((0 == numElements) 
-				? POINTER_SIZE 
-				: wordAlign (numElements));
+			align (GC_ARRAY_HEADER_SIZE 
+				+ WORD_SIZE // for the sign
+				+ numElements,
+				s->alignment);
 	}
 	for (i = 0; i < s->stringInitsSize; ++i) {
 		numElements = s->stringInits[i].size;
 		s->bytesLive +=
-			GC_ARRAY_HEADER_SIZE
-			+ ((0 == numElements) 
-				? POINTER_SIZE 
-				: wordAlign (numElements));
+			align (GC_ARRAY_HEADER_SIZE
+				+ ((0 == numElements) 
+					? POINTER_SIZE
+					: numElements),
+				s->alignment);
 	}
 }
 
@@ -3642,6 +3747,7 @@
 	bignum	*bp;
 	char	*cp;
 
+	assert (isAlignedFrontier (s, s->frontier));
 	frontier = s->frontier;
 	for (index = 0; index < s->intInfInitsSize; ++index) {
 		inits = &s->intInfInits[index];
@@ -3658,7 +3764,7 @@
 			llen = (slen + 7) / 8;
 		} else
 			llen = (slen + 8) / 9;
-		assert(slen > 0);
+		assert (slen > 0);
 		bp = (bignum *)frontier;
 		cp = (char *)&bp->limbs[llen];
 		for (i = 0; i != slen; ++i)
@@ -3671,7 +3777,7 @@
 				cp[i] = str[i] - 'A' + 0xA;
 			}
 		alen = mpn_set_str (bp->limbs, cp, slen, hex ? 0x10 : 10);
-		assert(alen <= llen);
+		assert (alen <= llen);
 		if (alen <= 1) {
 			uint	val,
 				ans;
@@ -3702,8 +3808,9 @@
 		bp->card = alen + 1;
 		bp->magic = BIGMAGIC;
 		bp->isneg = neg;
-		frontier = (pointer)&bp->limbs[alen];
+		frontier = alignFrontier (s, (pointer)&bp->limbs[alen]);
 	}
+	assert (isAlignedFrontier (s, frontier));
 	s->frontier = frontier;
 	GC_profileAllocInc (s, frontier - s->frontier);
 	s->bytesAllocated += frontier - s->frontier;
@@ -3714,16 +3821,18 @@
 	pointer frontier;
 	int i;
 
+	assert (isAlignedFrontier (s, s->frontier));
 	inits = s->stringInits;
 	frontier = s->frontier;
 	for (i = 0; i < s->stringInitsSize; ++i) {
 		uint numElements, numBytes;
 
 		numElements = inits[i].size;
-		numBytes = GC_ARRAY_HEADER_SIZE
-			+ ((0 == numElements) 
-				? POINTER_SIZE 
-				: wordAlign(numElements));
+		numBytes = align (GC_ARRAY_HEADER_SIZE
+					+ ((0 == numElements) 
+						? POINTER_SIZE
+						: numElements),
+					s->alignment);
 		assert (numBytes <= s->heap.start + s->heap.size - frontier);
 		*(word*)frontier = 0; /* counter word */
 		*(word*)(frontier + WORD_SIZE) = numElements;
@@ -3747,23 +3856,25 @@
 				(uint)frontier);
 	GC_profileAllocInc (s, frontier - s->frontier);
 	s->bytesAllocated += frontier - s->frontier;
+	assert (isAlignedFrontier (s, frontier));
 	s->frontier = frontier;
 }
 
 static void newWorld (GC_state s) {
 	int i;
+	pointer start;
 
-	assert (isAligned (sizeof (struct GC_thread), WORD_SIZE));
 	for (i = 0; i < s->globalsSize; ++i)
 		s->globals[i] = (pointer)BOGUS_POINTER;
 	setInitialBytesLive (s);
 	heapCreate (s, &s->heap, heapDesiredSize (s, s->bytesLive, 0),
 			s->bytesLive);
 	createCardMapAndCrossMap (s);
-	setFrontier (s, s->heap.start);
+	start = alignFrontier (s, s->heap.start);
+	s->frontier = start;
 	initIntInfs (s);
 	initStrings (s);
-	assert (s->frontier - s->heap.start <= s->bytesLive);
+	assert (s->frontier - start <= s->bytesLive);
 	s->oldGenSize = s->frontier - s->heap.start;
 	setNursery (s, 0, 0);
 	switchToThread (s, newThreadOfSize (s, initialStackSize (s)));
@@ -3815,6 +3926,11 @@
 	char *worldFile;
 	int i;
 
+	assert (isAligned (sizeof (struct GC_stack), s->alignment));
+	assert (isAligned (GC_NORMAL_HEADER_SIZE + sizeof (struct GC_thread),
+				s->alignment));
+	assert (isAligned (GC_NORMAL_HEADER_SIZE + sizeof (struct GC_weak),
+				s->alignment));
 	s->amInGC = TRUE;
 	s->amInMinorGC = FALSE;
 	s->bytesAllocated = 0;
@@ -3995,7 +4111,7 @@
          */
 	s->ram = align (s->ramSlop * s->totalRam, s->pageSize);
 	if (DEBUG or DEBUG_RESIZING or s->messages)
-		fprintf (stderr, "totalRam = %s  totalSwap = %s  ram = %s\n",
+		fprintf (stderr, "total RAM = %s  total swap = %s  RAM = %s\n",
 				uintToCommaString (s->totalRam), 
 				uintToCommaString (s->totalSwap),
 				uintToCommaString (s->ram));
@@ -4083,15 +4199,6 @@
 				uintToCommaString (s->minorBytesScanned));
 		fprintf (out, "minor skipped: %s bytes\n", 
 				uintToCommaString (s->minorBytesSkipped));
-#if METER
-		{
-			int i;
-			for (i = 0; i < cardof(sizes); ++i) {
-				if (0 != sizes[i])
-					fprintf (out, "COUNT[%d]=%d\n", i, sizes[i]);
-		  	}
-		}
-#endif
 	}
 	heapRelease (s, &s->heap);
 	heapRelease (s, &s->heap2);
@@ -4244,9 +4351,8 @@
 pointer GC_weakNew (GC_state s, W32 header, pointer p) {
 	pointer res;
 
-	res = object (s, header,
-			HEADER_SIZE + WORD_SIZE + WORD_SIZE,
-			FALSE);
+	res = object (s, header, GC_NORMAL_HEADER_SIZE + 3 * WORD_SIZE, 
+			FALSE, FALSE);
 	((GC_weak)res)->object = p;
 	if (DEBUG_WEAK)
 		fprintf (stderr, "0x%08x = GC_weakNew (0x%08x, 0x%08x)\n",



1.59      +11 -8     mlton/runtime/gc.h

Index: gc.h
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/gc.h,v
retrieving revision 1.58
retrieving revision 1.59
diff -u -r1.58 -r1.59
--- gc.h	18 Apr 2003 22:45:04 -0000	1.58
+++ gc.h	24 Apr 2003 20:51:02 -0000	1.59
@@ -105,6 +105,7 @@
 } GC_ObjectTypeTag;
 
 typedef struct {
+	/* Keep tag first, at zero offset, since it is referenced most often. */
 	GC_ObjectTypeTag tag;
 	ushort numNonPointers;
 	ushort numPointers;
@@ -208,6 +209,7 @@
 /* ------------------------------------------------- */
 
 typedef struct GC_weak {
+	uint unused;
 	struct GC_weak *link;
 	pointer object;
 } *GC_weak;
@@ -309,6 +311,7 @@
 	pointer stackTop;
 	pointer stackLimit;	/* stackBottom + stackSize - maxFrameSize */
 
+	uint alignment;		/* Either WORD_SIZE or 2 * WORD_SIZE. */
 	bool amInGC;
 	bool amInMinorGC;
 	pointer back;     	/* Points at next available word in toSpace. */
@@ -330,7 +333,7 @@
 	float copyGenerationalRatio;
 	float copyRatio;	/* Minimum live ratio to use copying GC. */
 	GC_heap crossMapHeap;	/* only used during GC. */
-	pointer crossMap;
+	uchar *crossMap;
 	uint crossMapSize;
 	GC_thread currentThread; /* This points to a thread in the heap. */
 	uint fixedHeapSize; 	/* Only meaningful if useFixedHeap. */
@@ -348,11 +351,11 @@
 	struct GC_heap heap;
 	struct GC_heap heap2;	/* Used for major copying collection. */
 	bool inSignalHandler; 	/* TRUE iff a signal handler is running. */
+	struct GC_intInfInit *intInfInits;
+	uint intInfInitsSize;
 	/* canHandle == 0 iff GC may switch to the signal handler
  	 * thread.  This is used to implement critical sections.
 	 */
-	struct GC_intInfInit *intInfInits;
-	uint intInfInitsSize;
 	volatile int canHandle;
 	bool isOriginal;
 	pointer limitPlusSlop; /* limit + LIMIT_SLOP */
@@ -393,7 +396,6 @@
 	 */
 	float nurseryRatio;
 	pointer nursery;
-	uint nurserySize;
 	GC_ObjectType *objectTypes; /* Array of object types. */
 	uint objectTypesSize;
 	/* Arrays larger than oldGenArraySize are allocated in the old generation
@@ -420,10 +422,6 @@
 	GC_thread savedThread;
 	/* saveGlobals writes out the values of all of the globals to fd. */
 	void (*saveGlobals)(int fd);
-	/* serializeStart holds the frontier at the start of the serialized
-         * object during object serialization.
-         */
-	pointer serializeStart;
 	GC_thread signalHandler; /* The mutator signal handler thread. */
 	sigset_t signalsHandled; /* The signals handler expects to be handled. */
 	/* signalIsPending is TRUE iff a signal has been received but not
@@ -491,6 +489,11 @@
 /* ---------------------------------------------------------------- */
 /*                           GC functions                           */
 /* ---------------------------------------------------------------- */
+
+/* GC_alignFrontier (s, p) returns the next properly aligned object start after
+ * p, possibly p itself.
+ */
+pointer GC_alignFrontier (GC_state s, pointer p);
 
 /* Allocate an array with the specified header and number of elements.
  * Also ensure that frontier + bytesNeeded < limit after the array is allocated.



1.20      +2 -3      mlton/runtime/my-lib.c

Index: my-lib.c
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/my-lib.c,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- my-lib.c	10 Apr 2003 02:03:10 -0000	1.19
+++ my-lib.c	24 Apr 2003 20:51:03 -0000	1.20
@@ -39,10 +39,9 @@
 	exit(1);
 }
 
-void asfail(char *file, int line, char *prop)
-{
+void asfail(char *file, int line, char *prop) {
 	fflush(stdout);
-	fprintf(stderr, "%s %d: assert(%s) failed.\n", file, line, prop);
+	fprintf(stderr, "%s:%d: assert(%s) failed.\n", file, line, prop);
 	abort();
 }
 



1.14      +1 -0      mlton/runtime/basis/IntInf.c

Index: IntInf.c
===================================================================
RCS file: /cvsroot/mlton/mlton/runtime/basis/IntInf.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- IntInf.c	7 Jan 2003 17:56:08 -0000	1.13
+++ IntInf.c	24 Apr 2003 20:51:04 -0000	1.14
@@ -121,6 +121,7 @@
 }
 
 static inline void setFrontier (pointer p) {
+	p = GC_alignFrontier (&gcState, p);
 	GC_profileAllocInc (&gcState, p - gcState.frontier);
 	gcState.frontier = p;
 	assert (gcState.frontier <= gcState.limitPlusSlop);





-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf
_______________________________________________
MLton-devel mailing list
MLton-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mlton-devel