[MLton-commit] r4064

Matthew Fluet MLton@mlton.org
Sun, 4 Sep 2005 20:19:14 -0700


More progress on refactoring GC.
----------------------------------------------------------------------

U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/align.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/array.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/array.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/assumptions.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/mltongc.txt
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/pointer.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/pointer.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/stack.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/thread.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h

----------------------------------------------------------------------

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile	2005-09-05 03:19:05 UTC (rev 4064)
@@ -48,18 +48,27 @@
 CFLAGS = -O2 -Wall -I. -D_FILE_OFFSET_BITS=64 $(FLAGS)
 DEBUGFLAGS = $(CFLAGS) -gstabs+ -g2
 
+## Order matters, as these are concatenated together to form "gc.c".
 CFILES = 								\
 	gc_prefix.c							\
 	debug.c								\
+	pointer.c							\
+	align.c								\
+	model.c								\
 	object.c							\
-	model.c								\
+	array.c								\
+	foreach.c							\
+	assumptions.c							\
 	gc_suffix.c
 
+## Order matters, as these are concatenated together to form "gc.h".
 HFILES = 								\
 	gc_prefix.h							\
 	util.h								\
+	pointer.h							\
 	model.h								\
 	object.h							\
+	array.h								\
 	stack.h								\
 	frame.h								\
 	thread.h							\

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO	2005-09-05 03:19:05 UTC (rev 4064)
@@ -9,3 +9,5 @@
         choosing the representation for Weaks based on the model and
         the alignment; also, the GC will need to bump the pointer to
         the word after the header to get GC_weak to overlay properly.
+* what type should be used for the size field in GC_heap?  I'm using
+        size_t currently, since that is the type needed by malloc.

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/align.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/align.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,71 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+static inline uintptr_t align (uintptr_t a, uintptr_t b) {
+  assert (a >= 0);
+  assert (b >= 1);
+  a += b - 1;
+  a -= a % b;
+  return a;       
+}
+
+/*
+static inline W64 w64align (W64 a, uint b) {
+        W64 res;
+
+        assert (a >= 0);
+        assert (b >= 1);
+        res = a + b - 1;
+        res = res - res % b;
+        if (FALSE)
+                fprintf (stderr, "%llu = w64Align (%llu, %u)\n", res, a, b);
+        return res;
+}
+*/
+
+static bool isAligned (uintptr_t a, size_t b) {
+  return 0 == a % b;
+}
+
+#if ASSERT
+static bool isAlignedFrontier (GC_state s, pointer p) {
+  return isAligned ((uintptr_t)p + GC_NORMAL_HEADER_SIZE, s->alignment);
+}
+
+/*
+static bool isAlignedReserved (GC_state s, uint r) {
+  return isAligned (STACK_HEADER_SIZE + sizeof (struct GC_stack) + r, 
+                    s->alignment);
+}
+*/
+#endif
+
+static inline size_t pad (GC_state s, size_t bytes, size_t extra) {
+  return align (bytes + extra, s->alignment) - extra;
+}
+
+/*
+static inline pointer alignFrontier (GC_state s, pointer p) {
+  return (pointer) pad (s, (uintptr_t)p, GC_NORMAL_HEADER_SIZE);
+}
+
+pointer GC_alignFrontier (GC_state s, pointer p) {
+  return alignFrontier (s, p);
+}
+
+static inline uint stackReserved (GC_state s, uint r) {
+  uint res;
+  
+  res = pad (s, r, STACK_HEADER_SIZE + sizeof (struct GC_stack));
+  if (DEBUG_STACKS)
+    fprintf (stderr, "%s = stackReserved (%s)\n",
+             uintToCommaString (res),
+             uintToCommaString (r));
+  return res;
+}
+*/

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/array.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/array.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,53 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+#if ASSERT
+static pointer arrayPointer (GC_state s, 
+                             pointer a, 
+                             uint32_t arrayIndex, 
+                             uint32_t pointerIndex) {
+  bool hasIdentity;
+  GC_header header;
+  uint16_t numNonObjptrs;
+  uint16_t numObjptrs;
+  GC_objectTypeTag tag;
+  
+  header = GC_getHeader (a);
+  SPLIT_HEADER();
+  assert (tag == ARRAY_TAG);
+
+  size_t bytesPerElement = 
+    numNonObjptrsToBytes(numNonObjptrs, ARRAY_TAG) 
+    + (numObjptrs * OBJPTR_SIZE);
+
+  return a
+    + arrayIndex * bytesPerElement
+    + numNonObjptrsToBytes(numNonObjptrs, tag) 
+    + pointerIndex * OBJPTR_SIZE;
+}
+#endif
+
+/* The number of bytes in an array, not including the header. */
+static inline size_t arrayNumBytes (GC_state s,
+                                    pointer p, 
+                                    uint16_t numObjptrs,
+                                    uint16_t numNonObjptrs) {
+  size_t bytesPerElement;
+  GC_arrayLength numElements;
+  size_t result;
+        
+  numElements = GC_arrayNumElements (p);
+  bytesPerElement = 
+    numNonObjptrsToBytes(numNonObjptrs, ARRAY_TAG) 
+    + (numObjptrs * OBJPTR_SIZE);
+  result = numElements * bytesPerElement;
+  /* Empty arrays have OBJPTR_SIZE bytes for the forwarding pointer. */
+  if (0 == result) 
+    result = OBJPTR_SIZE;
+  return pad (s, result, GC_ARRAY_HEADER_SIZE);
+}

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/array.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/array.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,34 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/*
+ * Array objects have the following layout:
+ * 
+ * counter word32 :: 
+ * length word32 :: 
+ * header word32 :: 
+ * ( (non heap-pointers)* :: (heap pointers)* )*
+ *
+ * The counter word is used by mark compact GC.  The length word is
+ * the number of elements in the array.  Array elements have the same
+ * individual layout as normal objects, omitting the header word.
+ */
+typedef uint32_t GC_arrayLength;
+enum {
+  GC_ARRAY_LENGTH_SIZE =  sizeof(GC_arrayLength),
+  GC_ARRAY_COUNTER_SIZE = GC_ARRAY_LENGTH_SIZE,
+  GC_ARRAY_HEADER_SIZE =  GC_ARRAY_COUNTER_SIZE + GC_ARRAY_LENGTH_SIZE + GC_HEADER_SIZE,
+};
+
+static inline GC_arrayLength* GC_arrayNumElementsp (pointer a) {
+  return (GC_arrayLength*)(a - GC_HEADER_SIZE - GC_ARRAY_LENGTH_SIZE);
+}
+
+static inline GC_arrayLength GC_arrayNumElements (pointer a) {
+  return *(GC_arrayNumElementsp (a));
+}

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/assumptions.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/assumptions.c	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/assumptions.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,17 @@
+/* Copyright (C) 2005-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/*
+ * Various assumptions about the underlying C translator.  This is the
+ * place for characteristics that are not dictated by the C standard,
+ * but which are reasonable to assume on a wide variety of target
+ * platforms.  Working around these assumptions would be difficult.
+ */
+void checkAssumptions () {
+  assert(CHAR_BIT == 8);
+  /* assert(repof(uintptr_t) == TWOS); */
+}

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,209 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+typedef void (*GC_pointerFun) (GC_state s, objptr *pp);
+
+static inline void maybeCall (GC_pointerFun f, GC_state s, objptr *pp) {
+  if (GC_isObjptr (*pp))
+    f (s, pp);
+}
+
+/* foreachGlobal (s, f)
+ * 
+ * Apply f to each global object pointer into the heap. 
+ */
+static inline void foreachGlobal (GC_state s, GC_pointerFun f) {
+  for (int i = 0; i < s->globalsSize; ++i) {
+    if (DEBUG_DETAILED)
+      fprintf (stderr, "foreachGlobal %u\n", i);
+    maybeCall (f, s, &s->globals [i]);
+  }
+  if (DEBUG_DETAILED)
+    fprintf (stderr, "foreachGlobal threads\n");
+  maybeCall (f, s, &s->callFromCHandler);
+  maybeCall (f, s, &s->currentThread);
+  maybeCall (f, s, &s->savedThread);
+  maybeCall (f, s, &s->signalHandler);
+}
+
+
+/* foreachPointerInObject (s, p, skipWeaks, f) 
+ * 
+ * Applies f to each object pointer in the object pointed to by p.
+ * Returns pointer to the end of object, i.e. just past object.
+ *
+ * If skipWeaks, then the object pointer in weak objects is skipped.
+ */
+static inline pointer foreachPointerInObject (GC_state s, 
+                                              pointer p,
+                                              bool skipWeaks,
+                                              GC_pointerFun f) {
+  bool hasIdentity;
+  GC_header header;
+  uint16_t numNonObjptrs;
+  uint16_t numObjptrs;
+  GC_objectTypeTag tag;
+
+  header = GC_getHeader (p);
+  SPLIT_HEADER();
+  if (DEBUG_DETAILED)
+    fprintf (stderr, 
+             "foreachPointerInObject ("FMTPTR")"
+             "  header = "FMTHDR
+             "  tag = %s"
+             "  numNonObjptrs = %d"
+             "  numObjptrs = %d\n", 
+             (intptr_t)p, header, tagToString (tag), 
+             numNonObjptrs, numObjptrs);
+  if (NORMAL_TAG == tag) {
+    p += numNonObjptrsToBytes(numNonObjptrs, NORMAL_TAG);
+    pointer max = p + (numObjptrs * OBJPTR_SIZE);
+    /* Apply f to all internal pointers. */
+    for ( ; p < max; p += OBJPTR_SIZE) {
+      if (DEBUG_DETAILED)
+        fprintf (stderr, 
+                 "p = "FMTPTR"  *p = "FMTOBJPTR"\n",
+                 (intptr_t)p, *(objptr*)p);
+      maybeCall (f, s, (objptr*)p);
+    }
+  } else if (WEAK_TAG == tag) {
+    p += numNonObjptrsToBytes(numNonObjptrs, NORMAL_TAG);
+    if (not skipWeaks and 1 == numObjptrs) {
+      maybeCall (f, s, (objptr*)p);
+      p += OBJPTR_SIZE;
+    }
+  } else if (ARRAY_TAG == tag) {
+    size_t bytesPerElement;
+    size_t dataBytes;
+    pointer max;
+    GC_arrayLength numElements;
+    
+    numElements = GC_arrayNumElements (p);
+    bytesPerElement = 
+      numNonObjptrsToBytes(numNonObjptrs, ARRAY_TAG) 
+      + (numObjptrs * OBJPTR_SIZE);
+    dataBytes = numElements * bytesPerElement;
+    /* Must check 0 == dataBytes before 0 == numPointers to correctly
+     * handle arrays when both are true.
+     */
+    if (0 == dataBytes)
+      /* Empty arrays have space for forwarding pointer. */
+      dataBytes = OBJPTR_SIZE;
+    else if (0 == numObjptrs)
+      /* No pointers to process. */
+      ;
+    else {
+      max = p + dataBytes;
+      if (0 == numNonObjptrs)
+        /* Array with only pointers. */
+        for (; p < max; p += OBJPTR_SIZE)
+          maybeCall (f, s, (objptr*)p);
+      else {
+        /* Array with a mix of pointers and non-pointers. */
+        size_t nonObjptrBytes;
+        size_t objptrBytes;
+        
+        nonObjptrBytes = numNonObjptrsToBytes(numNonObjptrs, ARRAY_TAG);
+        objptrBytes = numObjptrs * OBJPTR_SIZE;
+
+        /* For each array element. */
+        while (p < max) {
+          pointer max2;
+          
+          /* Skip the non-pointers. */
+          p += nonObjptrBytes;
+          max2 = p + objptrBytes;
+          /* For each internal pointer. */
+          for ( ; p < max2; p += OBJPTR_SIZE) 
+            maybeCall (f, s, (objptr*)p);
+        }
+      }
+      assert (p == max);
+      p -= dataBytes;
+    }
+    p += pad (s, dataBytes, GC_ARRAY_HEADER_SIZE);
+  } else { /* stack */
+/*     GC_stack stack; */
+/*     pointer top, bottom; */
+/*     int i; */
+/*     word returnAddress; */
+/*     GC_frameLayout *layout; */
+/*     GC_offsets frameOffsets; */
+    
+/*     assert (STACK_TAG == tag); */
+/*     stack = (GC_stack)p; */
+/*     bottom = stackBottom (s, stack); */
+/*     top = stackTop (s, stack); */
+/*     assert (stack->used <= stack->reserved); */
+/*     while (top > bottom) { */
+/*       /\* Invariant: top points just past a "return address". *\/ */
+/*       returnAddress = *(word*) (top - WORD_SIZE); */
+/*       if (DEBUG) { */
+/*         fprintf (stderr, "  top = %d  return address = ", */
+/*                  top - bottom); */
+/*         fprintf (stderr, "0x%08x.\n", returnAddress); */
+/*       } */
+/*       layout = getFrameLayout (s, returnAddress);  */
+/*       frameOffsets = layout->offsets; */
+/*       top -= layout->numBytes; */
+/*       for (i = 0 ; i < frameOffsets[0] ; ++i) { */
+/*         if (DEBUG) */
+/*           fprintf(stderr,  */
+/*                   "    offset %u  address 0x%08x\n",  */
+/*                   frameOffsets[i + 1], */
+/*                   (uint)(*(pointer*)(top + frameOffsets[i + 1]))); */
+/*         maybeCall(f, s,  */
+/*                   (pointer*) */
+/*                   (top + frameOffsets[i + 1])); */
+/*       } */
+/*     } */
+/*     assert(top == bottom); */
+/*     p += sizeof (struct GC_stack) + stack->reserved; */
+  }
+  return p;
+}
+
+/* foreachPointerInRange (s, front, back, skipWeaks, f)
+ *
+ * Apply f to each pointer between front and *back, which should be a
+ * contiguous sequence of objects, where front points at the beginning
+ * of the first object and *back points just past the end of the last
+ * object.  f may increase *back (for example, this is done by
+ * forward).  foreachPointerInRange returns a pointer to the end of
+ * the last object it visits.
+ *
+ * If skipWeaks, then the object pointer in weak objects is skipped.
+ */
+
+static inline pointer foreachPointerInRange (GC_state s, 
+                                             pointer front, 
+                                             pointer *back,
+                                             bool skipWeaks,
+                                             GC_pointerFun f) {
+  pointer b;
+
+  assert (isAlignedFrontier (s, front));
+  if (DEBUG_DETAILED)
+    fprintf (stderr, 
+             "foreachPointerInRange  front = "FMTPTR"  *back = "FMTPTR"\n",
+             (intptr_t)front, (intptr_t)(*back));
+  b = *back;
+  assert (front <= b);
+  while (front < b) {
+    while (front < b) {
+      assert (isAligned ((uintptr_t)front, GC_MODEL_MINALIGN));
+      if (DEBUG_DETAILED)
+        fprintf (stderr, 
+                 "front = "FMTPTR"  *back = "FMTPTR"\n",
+                 (intptr_t)front, (intptr_t)(*back));
+      front = foreachPointerInObject (s, toData (s, front), skipWeaks, f);
+    }
+    b = *back;
+  }
+  return front;
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -1,8 +1,18 @@
 
 typedef struct GC_state {
+  size_t alignment; /* */
+  objptr callFromCHandler; /* Handler for exported C calls (in heap). */
+  objptr currentThread; /* Currently executing thread (in heap). */
+  objptr *globals;
+  uint32_t globalsSize;
   struct GC_heap heap;
-  struct GC_heap secondaryHeap; /* Used for major copying collection. */
   GC_objectType *objectTypes; /* Array of object types. */
   uint32_t objectTypesSize; /* Cardinality of objectTypes array. */
+  objptr savedThread; /* Result of GC_copyCurrentThread.
+                       * Thread interrupted by arrival of signal.
+                       */
+  struct GC_heap secondaryHeap; /* Used for major copying collection. */
+  objptr signalHandler; /* Handler for signals (in heap). */
+  /*Bool*/bool summary; /* Print a summary of gc info when program exits. */
   GC_weak weaks; /* Linked list of (live) weak pointers */
 } *GC_state;

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -18,6 +18,6 @@
 */
 
 typedef struct GC_heap {
-  uint32_t size;
+  size_t size;
   pointer start;          /* start of memory area */
 } *GC_heap;

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/mltongc.txt
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/mltongc.txt	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/mltongc.txt	2005-09-05 03:19:05 UTC (rev 4064)
@@ -284,7 +284,7 @@
    programs compiled on 64-bit architectures are essentially the same
    as those compiled on 32-bit architectures.  In particular, 2^19
    object types should remain viable for some time to come.  Likewise,
-   the 20 counter bits in the header word (used to implement the mark
+   the 10 counter bits in the header word (used to implement the mark
    stack) should continue to be sufficient for the number of heap
    pointers in a normal heap object.  Finally, 16-bits for the
    numNonPointers and numPointers fields of a GC_ObjectType will

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -6,17 +6,20 @@
  */
 
 static inline pointer objptrToPointer (objptr O, pointer B) {
-  intptr_t O_ = (intptr_t)O;
-  intptr_t B_;
+  uintptr_t O_ = (uintptr_t)O;
+  uintptr_t B_;
+  unsigned int S_ = GC_MODEL_SHIFT;
+  uintptr_t P_;
   pointer P;
 
   if GC_MODEL_USEBASE {
-    B_ = (intptr_t)B;
+    B_ = (uintptr_t)B;
   } else {
     B_ = 0;
   }
 
-  P = (pointer)((O_ << GC_MODEL_SHIFT) + B_);
+  P_ = ((O_ << S_) + B_);
+  P = (pointer)P_;
   if (DEBUG_DETAILED) 
     fprintf (stderr, "objptrToPointer ("FMTOBJPTR") = "FMTPTR"\n", O, (intptr_t)P);
   
@@ -24,19 +27,52 @@
 }
 
 static inline objptr pointerToObjptr (pointer P, pointer B) {
-  intptr_t P_ = (intptr_t)P;
-  intptr_t B_;
+  uintptr_t P_ = (uintptr_t)P;
+  uintptr_t B_;
+  unsigned int S_ = GC_MODEL_SHIFT;
+  uintptr_t O_;
   objptr O;
 
   if GC_MODEL_USEBASE {
-    B_ = (intptr_t)B;
+    B_ = (uintptr_t)B;
   } else {
     B_ = 0;
   }
 
-  O = (objptr)((P_ - B_) >> GC_MODEL_SHIFT);
+  O_ = ((P_ - B_) >> S_);
+  O = (objptr)O_;
   if (DEBUG_DETAILED) 
     fprintf (stderr, "pointerToObjptr ("FMTPTR") = "FMTOBJPTR"\n", (intptr_t)P, O);
 
   return O;
 }
+
+/* GC_isObjptr returns true if p looks like an object pointer. */
+static inline bool GC_isObjptr (objptr p) {
+  if GC_MODEL_NONPTR {
+    unsigned int shift = GC_MODEL_MINALIGN_SHIFT - GC_MODEL_SHIFT;
+    objptr mask = ~((~((objptr)0)) << shift);
+    return (0 == (p & mask));
+  } else {
+    return TRUE;
+  }
+}
+
+/*
+ * Note that by indirectly fetching and storing object pointers, the
+ * following functions admit implementations that behave according to
+ * model characteristics determined at runtime.  Hence, by making
+ * exclusive use of these functions (and adding a GC_state->model
+ * field set by the compiled program), we may be able to implement the
+ * runtime in a manner which is agnostic to the actual objptr
+ * representation.
+ */
+static inline pointer fetchObjptrToPointer (pointer OP, pointer B) {
+  return objptrToPointer (*((objptr*)OP), B);
+}
+static inline void storeObjptrFromPointer (pointer OP, pointer P, pointer B) {
+  *((objptr*)OP) = pointerToObjptr (P, B);
+}
+static inline size_t objptrSize () {
+  return OBJPTR_SIZE;
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -28,7 +28,7 @@
 D       32      slow     16     4
 E       32      slow     32     8
 F       40      slow    256     4
-G       64      fast     4G     8
+G       64      fast     4G     4
 
 Each of the (A-F) has a variant (AX-FX) in which pointers are added to
 some constant base address.  This gives access to any region in the
@@ -139,35 +139,74 @@
 
 #if (defined (GC_MODEL_A))
 #define GC_MODEL_BITSIZE  32
-#define GC_MODEL_SHIFT  0
+#define GC_MODEL_SHIFT    0
 #define GC_MODEL_USEBASE  FALSE
+#define GC_MODEL_MINALIGN_SHIFT 2
 #elif (defined (GC_MODEL_AX))
-#define GC_MODEL_BITSIZE  32
-#define GC_MODEL_SHIFT  0
-#define GC_MODEL_USEBASE  TRUE
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   0
+#define GC_MODEL_USEBASE TRUE
+#define GC_MODEL_MINALIGN_SHIFT 2
 #elif (defined (GC_MODEL_B))
-#define GC_MODEL_BITSIZE  32
-#define GC_MODEL_SHIFT  1
-#define GC_MODEL_USEBASE  FALSE
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   1
+#define GC_MODEL_USEBASE FALSE
+#define GC_MODEL_MINALIGN_SHIFT 2
 #elif (defined (GC_MODEL_BX))
-#define GC_MODEL_BITSIZE  32
-#define GC_MODEL_SHIFT  1
-#define GC_MODEL_USEBASE  TRUE
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   1
+#define GC_MODEL_USEBASE TRUE
+#define GC_MODEL_MINALIGN_SHIFT 2
 #elif (defined (GC_MODEL_C))
-#define GC_MODEL_BITSIZE  32
-#define GC_MODEL_SHIFT  2
-#define GC_MODEL_USEBASE  FALSE
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   2
+#define GC_MODEL_USEBASE FALSE
+#define GC_MODEL_MINALIGN_SHIFT 3
 #elif (defined (GC_MODEL_CX))
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   2
+#define GC_MODEL_USEBASE TRUE
+#define GC_MODEL_MINALIGN_SHIFT 3
+#elif (defined (GC_MODEL_D))
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   2
+#define GC_MODEL_USEBASE FALSE
+#define GC_MODEL_MINALIGN_SHIFT 2
+#elif (defined (GC_MODEL_DX))
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   2
+#define GC_MODEL_USEBASE TRUE
+#define GC_MODEL_MINALIGN_SHIFT 2
+#elif (defined (GC_MODEL_E))
+#define GC_MODEL_BITSIZE 32
+#define GC_MODEL_SHIFT   3
+#define GC_MODEL_USEBASE FALSE
+#define GC_MODEL_MINALIGN_SHIFT 3
+#elif (defined (GC_MODEL_EX))
 #define GC_MODEL_BITSIZE  32
-#define GC_MODEL_SHIFT  2
+#define GC_MODEL_SHIFT    3
 #define GC_MODEL_USEBASE  TRUE
+#define GC_MODEL_MINALIGN_SHIFT 3
+#elif (defined (GC_MODEL_F))
+#define GC_MODEL_BITSIZE  40
+#define GC_MODEL_SHIFT    0
+#define GC_MODEL_USEBASE  FALSE
+#define GC_MODEL_MINALIGN_SHIFT 2
+#elif (defined (GC_MODEL_EX))
+#define GC_MODEL_BITSIZE  40
+#define GC_MODEL_SHIFT    0
+#define GC_MODEL_USEBASE  TRUE
+#define GC_MODEL_MINALIGN_SHIFT 2
 #elif (defined (GC_MODEL_G))
 #define GC_MODEL_BITSIZE  64
-#define GC_MODEL_SHIFT  0
+#define GC_MODEL_SHIFT    0
 #define GC_MODEL_USEBASE  FALSE
+#define GC_MODEL_MINALIGN_SHIFT 2
 #else 
-#error gc model undefined
+#error gc model unknown
 #endif
+#define GC_MODEL_NONPTR ((GC_MODEL_MINALIGN_SHIFT - GC_MODEL_SHIFT) > 0)
+#define GC_MODEL_MINALIGN TWOPOWER(GC_MODEL_MINALIGN_SHIFT)
 
 #define OBJPTR_TYPE__(z) uint ## z ## _t
 #define OBJPTR_TYPE_(z) OBJPTR_TYPE__(z)
@@ -178,3 +217,9 @@
 #define PRIxOBJPTR_(z) PRIxOBJPTR__(z)
 #define PRIxOBJPTR PRIxOBJPTR_(GC_MODEL_BITSIZE)
 #define FMTOBJPTR "0x%016"PRIxOBJPTR
+
+#if GC_MODEL_NONPTR
+#define BOGUS_OBJPTR 0x1
+#else
+#error gc model does not admit bogus object pointer
+#endif

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -32,17 +32,17 @@
     t = &s->objectTypes [objectTypeIndex];                                      \
     tag = t->tag;                                                               \
     hasIdentity = t->hasIdentity;                                               \
-    numNonPointers = t->numNonPointers;                                         \
-    numPointers = t->numPointers;                                               \
+    numNonObjptrs = t->numNonObjptrs;                                           \
+    numObjptrs = t->numObjptrs;                                                 \
     if (DEBUG_DETAILED)                                                         \
       fprintf (stderr,                                                          \
                "SPLIT_HEADER ("FMTHDR")"                                        \
                "  tag = %s"                                                     \
                "  hasIdentity = %u"                                             \
-               "  numNonPointers = %"PRIu16                                     \
-               "  numPointers = %"PRIu16"\n",                                   \
+               "  numNonObjptrs = %"PRIu16                                      \
+               "  numObjptrs = %"PRIu16"\n",                                    \
                header,                                                          \
-               tagToString(tag), hasIdenity, numNonPointers, numPointers);      \
+               tagToString(tag), hasIdentity, numNonObjptrs, numObjptrs);       \
   } while (0)
 
 static char* tagToString (GC_objectTypeTag tag) {
@@ -59,3 +59,36 @@
     die ("bad tag %u", tag);
   }
 }
+
+/* If p points at the beginning of an object, then toData p returns a
+ * pointer to the start of the object data.
+ */
+static inline pointer toData (GC_state s, pointer p) {
+  GC_header header;
+  pointer res;
+
+  assert (isAlignedFrontier (s, p));
+  header = *(GC_header*)p;
+  if (0 == header)
+    /* Looking at the counter word in an array. */
+    res = p + GC_ARRAY_HEADER_SIZE;
+  else
+    /* Looking at a header word. */
+    res = p + GC_NORMAL_HEADER_SIZE;
+  assert (isAligned ((uintptr_t)res, s->alignment));
+  return res;
+}
+
+static inline size_t numNonObjptrsToBytes (uint16_t numNonObjptrs, 
+                                           GC_objectTypeTag tag) {
+  switch (tag) {
+  case ARRAY_TAG:
+    return (size_t)(numNonObjptrs);
+  case NORMAL_TAG:
+    return (size_t)(numNonObjptrs) * 4;
+  case WEAK_TAG:
+    return (size_t)(numNonObjptrs) * 4;
+  default:
+    die ("bad tag %u", tag);
+  }
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -42,6 +42,22 @@
   MARK_SHIFT =       31
 };
 
+/* GC_getHeaderp (p)
+ *
+ * Returns a pointer to the header for the object pointed to by p.
+ */
+static inline GC_header* GC_getHeaderp (pointer p) {
+  return (GC_header*)(p - GC_HEADER_SIZE);
+}
+
+/* GC_getHeader (p) 
+ *
+ * Returns the header for the object pointed to by p. 
+ */
+static inline GC_header GC_getHeader (pointer p) {
+  return *(GC_getHeaderp(p));
+}
+
 /*
  * Normal objects have the following layout:
  *
@@ -60,23 +76,7 @@
   GC_NORMAL_HEADER_SIZE = GC_HEADER_SIZE,
 };
 
-/*
- * Array objects have the following layout:
- * 
- * counter word32 :: 
- * length word32 :: 
- * header word32 :: 
- * ( (non heap-pointers)* :: (heap pointers)* )*
- *
- * The counter word is used by mark compact GC.  The length word is
- * the number of elements in the array.  Array elements have the same
- * individual layout as normal objects, omitting the header word.
- */
-enum {
-  GC_ARRAY_LENGTH_SIZE =  4,
-  GC_ARRAY_COUNTER_SIZE = GC_ARRAY_LENGTH_SIZE,
-  GC_ARRAY_HEADER_SIZE =  GC_ARRAY_COUNTER_SIZE + GC_ARRAY_LENGTH_SIZE + GC_HEADER_SIZE,
-};
+/* Array objects are described in "array.h" */
 
 /* Stack objects are described in "stack.h" */
 
@@ -94,22 +94,22 @@
  * of object types that is emitted for each compiled program.  The
  * hasIdentity field indicates whether or not the object has mutable
  * fields, in which case it may not be hash-cons-ed.  In a normal
- * object, the numNonPointers field indicates the number of 32-bit
- * words of non heap-pointer data, while the numPointers field
+ * object, the numNonObjptrs field indicates the number of 32-bit
+ * words of non heap-pointer data, while the numObjptrs field
  * indicates the number of heap pointers.  In an array object, the
- * numNonPointers field indicates the number of bytes of non
- * heap-pointer data, while the numPointers field indicates the number
- * of heap pointers.  In a stack object, the numNonPointers and
- * numPointers fields are irrelevant.  In a weak object, the
- * numNonPointers and numPointers fields are interpreted as in a
- * normal object (and, hence, must be (0,1) or (0,0)).
+ * numNonObjptrs field indicates the number of bytes of non
+ * heap-pointer data, while the numObjptrs field indicates the number
+ * of heap pointers.  In a stack object, the numNonObjptrs and
+ * numObjptrs fields are irrelevant.  In a weak object, the
+ * numNonObjptrs and numObjptrs fields are interpreted as in a normal
+ * object (and, hence, must be (2,1) or (3,0)).
 */
 typedef struct {
         /* Keep tag first, at zero offset, since it is referenced most often. */
         GC_objectTypeTag tag;
         bool hasIdentity;
-        uint16_t numNonPointers;
-        uint16_t numPointers;
+        uint16_t numNonObjptrs;
+        uint16_t numObjptrs;
 } GC_objectType;
 enum {
   /* The type indices here must agree with those in backend/rep-type.fun. */

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/pointer.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/pointer.c	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,13 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/* GC_isPointer returns true if p looks like a pointer. */
+static inline bool GC_isPointer (pointer p) {
+  uintptr_t mask = ~((~((uintptr_t)0)) << GC_MODEL_MINALIGN_SHIFT);
+  return (0 == ((uintptr_t)p & mask));
+}

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/pointer.h (from rev 4063, mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/pointer.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -0,0 +1,11 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+typedef unsigned char* pointer;
+#define FMTPTR "0x%016"PRIxPTR
+#define BOGUS_POINTER 0x1

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/stack.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/stack.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/stack.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -27,7 +27,7 @@
   /* markTop and markIndex are only used during marking.  They record
    * the current pointer in the stack that is being followed.  markTop
    * points to the top of the stack frame containing the pointer and
-   * markI is the index in that frames frameOffsets of the pointer
+   * markIndex is the index in that frames frameOffsets of the pointer
    * slot.  So, when the GC pointer reversal gets back to the stack,
    * it can continue with the next pointer (either in the current
    * frame or the next frame).

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/thread.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/thread.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/thread.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -21,3 +21,5 @@
                           */
   objptr stack;          /* The stack for this thread. */
 } *GC_thread;
+
+#define BOGUS_THREAD (GC_thread)BOGUS_POINTER

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h	2005-09-04 11:56:31 UTC (rev 4063)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h	2005-09-05 03:19:05 UTC (rev 4064)
@@ -5,10 +5,6 @@
  * MLton is released under a BSD-style license.
  * See the file MLton-LICENSE for details.
  */
-
-#ifndef _UTIL_H_
-#define _UTIL_H_
-
 #define _ISOC99_SOURCE
 #define _BSD_SOURCE
 
@@ -26,6 +22,8 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <inttypes.h>
+#include <stdlib.h>
+#include <limits.h>
 
 #include "../assert.h"
 
@@ -47,8 +45,3 @@
 extern void diee (char *fmt, ...)
                         __attribute__ ((format(printf, 1, 2)))
                         __attribute__ ((noreturn));
-
-typedef void* pointer;
-#define FMTPTR "0x%016"PRIxPTR
-
-#endif /* _UTIL_H_ */