[MLton-commit] r4060

Matthew Fluet MLton@mlton.org
Sat, 3 Sep 2005 17:58:10 -0700


Starting to build up a modularized GC, 
taking sizes and memory model into account.

----------------------------------------------------------------------

_U  mlton/branches/on-20050822-x86_64-branch/runtime/gc/
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/.ignore
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/cheney-copy.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/cheney-copy.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/debug.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_suffix.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_suffix.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/mark-sweep.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/mark-sweep.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h

----------------------------------------------------------------------


Property changes on: mlton/branches/on-20050822-x86_64-branch/runtime/gc
___________________________________________________________________
Name: svn:ignore
   + gc.h
gc.c


Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/.ignore
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/.ignore	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/.ignore	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,2 @@
+gc.h
+gc.c

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/Makefile)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/Makefile	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,95 @@
+## Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ #    Jagannathan, and Stephen Weeks.
+ # Copyright (C) 1997-2000 NEC Research Institute.
+ #
+ # MLton is released under a BSD-style license.
+ # See the file MLton-LICENSE for details.
+ ##
+
+PATH = ../../bin:$(shell echo $$PATH)
+
+TARGET = self
+TARGET_ARCH = $(shell ../../bin/host-arch)
+TARGET_OS = $(shell ../../bin/host-os)
+GCC_VERSION = $(shell gcc -v 2>&1 | grep 'gcc version' | sed 's/.*gcc version \(.\).*/\1/')
+
+FLAGS = -fomit-frame-pointer
+
+ifeq ($(TARGET_ARCH), x86)
+ifneq ($(findstring $(GCC_VERSION), 3 4),)
+FLAGS += -falign-loops=2 -falign-jumps=2 -falign-functions=5
+else
+FLAGS += -malign-loops=2 -malign-jumps=2 -malign-functions=5
+endif
+endif
+
+ifeq ($(TARGET_ARCH), amd64)
+FLAGS += -mtune=opteron -m32
+endif
+
+ifeq ($(TARGET_ARCH), sparc)
+FLAGS += -mv8 -m32
+endif
+
+ifeq ($(TARGET_OS), solaris)
+FLAGS += -Wa,-xarch=v8plusa -fcall-used-g5 -fcall-used-g7 -funroll-all-loops -mcpu=ultrasparc
+endif
+
+ifeq ($(TARGET), self)
+AR = ar rc
+RANLIB = ranlib
+else
+AR = $(TARGET)-ar rc
+RANLIB = $(TARGET)-ranlib
+FLAGS += -b $(TARGET)
+endif
+
+CC = gcc -std=gnu99
+CFLAGS = -O2 -Wall -I. -D_FILE_OFFSET_BITS=64 $(FLAGS)
+DEBUGFLAGS = $(CFLAGS) -gstabs+ -g2
+
+CFILES = 								\
+	gc_prefix.c							\
+	debug.c								\
+	object.c							\
+	model.c								\
+	gc_suffix.c
+
+HFILES = 								\
+	gc_prefix.h							\
+	util.h								\
+	object.h							\
+	model.h								\
+	heap.h								\
+	gc_state.h							\
+	gc_suffix.h
+
+all: gc.o gc-gdb.o
+
+gc-gdb.o: gc.c gc.h
+	$(CC) $(DEBUGFLAGS) -O1 -DASSERT=1 -c -o $@ gc.c
+
+gc.o: gc.c gc.h
+	$(CC) $(CFLAGS) -c -o $@ gc.c
+
+gc.c: $(CFILES)
+	rm -f gc.c
+	(								\
+		for f in $(CFILES); do					\
+			echo "#line 1 \"$$f\"";				\
+			cat $$f;					\
+		done;							\
+	) > gc.c
+
+gc.h: $(HFILES)
+	rm -f gc.h
+	(								\
+		for f in $(HFILES); do					\
+			echo "#line 1 \"$$f\"";				\
+			cat $$f;					\
+		done;							\
+	) > gc.h
+
+.PHONY: clean
+clean:
+	../bin/clean

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/TODO	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,6 @@
+
+* reorder ZZZ_TYPE_INDEX
+* eliminate STRING_TYPE_INDEX, STRING_TYPE_HEADER in favor or WORD8.
+* fix semantics of numNonPointers for normal objects to mean bytes of
+        non-pointer data, rather than number of 32-bit words of
+        non-pointer data.

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/cheney-copy.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/cheney-copy.c	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,209 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/* ---------------------------------------------------------------- */
+/*                    Cheney Copying Collection                     */
+/* ---------------------------------------------------------------- */
+
+/* forward (s, pp) forwards the object pointed to by *pp and updates *pp to 
+ * point to the new object. 
+ * It also updates the crossMap.
+ */
+static inline void forward (GC_state s, pointer *pp) {
+  pointer p;
+  GC_ObjectHeader header;
+  GC_ObjectTypeTag tag;
+
+  if (DEBUG_DETAILED)
+    fprintf (stderr, 
+             "forward  pp = 0x"PRIxPTR"  *pp = 0x"PRIxPTR"\n", 
+             pp, *pp);
+  assert (isInFromSpace (s, *pp));
+  p = *pp;
+  header = GC_getHeader (p);
+        if (DEBUG_DETAILED and FORWARDED == header)
+                fprintf (stderr, "already FORWARDED\n");
+        if (header != FORWARDED) { /* forward the object */
+                Bool hasIdentity;
+                uint headerBytes, objectBytes, size, skip;
+                uint numPointers, numNonPointers;
+
+                /* Compute the space taken by the header and object body. */
+                SPLIT_HEADER();
+                if (NORMAL_TAG == tag) { /* Fixed size object. */
+                        headerBytes = GC_NORMAL_HEADER_SIZE;
+                        objectBytes = toBytes (numPointers + numNonPointers);
+                        skip = 0;
+                } else if (ARRAY_TAG == tag) {
+                        headerBytes = GC_ARRAY_HEADER_SIZE;
+                        objectBytes = arrayNumBytes (s, p, numPointers,
+                                                        numNonPointers);
+                        skip = 0;
+                } else if (WEAK_TAG == tag) {
+                        headerBytes = GC_NORMAL_HEADER_SIZE;
+                        objectBytes = sizeof (struct GC_weak);
+                        skip = 0;
+                } else { /* Stack. */
+                        GC_stack stack;
+
+                        assert (STACK_TAG == tag);
+                        headerBytes = STACK_HEADER_SIZE;
+                        stack = (GC_stack)p;
+
+                        if (s->currentThread->stack == stack) {
+                                /* Shrink stacks that don't use a lot 
+                                 * of their reserved space;
+                                 * but don't violate the stack invariant.
+                                 */
+                                if (stack->used <= stack->reserved / 4) {
+                                        uint new = stackReserved (s, max (stack->reserved / 2,
+                                                                                stackNeedsReserved (s, stack)));
+                                        /* It's possible that new > stack->reserved if
+                                         * the stack invariant is violated. In that case, 
+                                         * we want to leave the stack alone, because some 
+                                         * other part of the gc will grow the stack.  We 
+                                         * cannot do any growing here because we may run 
+                                         * out of to space.
+                                         */
+                                        if (new <= stack->reserved) {
+                                                stack->reserved = new;
+                                                if (DEBUG_STACKS)
+                                                        fprintf (stderr, "Shrinking stack to size %s.\n",
+                                                                        uintToCommaString (stack->reserved));
+                                        }
+                                }
+                        } else {
+                                /* Shrink heap stacks.
+                                 */
+                                stack->reserved = stackReserved (s, max(s->threadShrinkRatio * stack->reserved, 
+                                                                        stack->used));
+                                if (DEBUG_STACKS)
+                                        fprintf (stderr, "Shrinking stack to size %s.\n",
+                                                        uintToCommaString (stack->reserved));
+                        }
+                        objectBytes = sizeof (struct GC_stack) + stack->used;
+                        skip = stack->reserved - stack->used;
+                }
+                size = headerBytes + objectBytes;
+                assert (s->back + size + skip <= s->toLimit);
+                /* Copy the object. */
+                copy (p - headerBytes, s->back, size);
+                /* If the object has a valid weak pointer, link it into the weaks
+                 * for update after the copying GC is done.
+                 */
+                if (WEAK_TAG == tag and 1 == numPointers) {
+                        GC_weak w;
+
+                        w = (GC_weak)(s->back + GC_NORMAL_HEADER_SIZE);
+                        if (DEBUG_WEAK)
+                                fprintf (stderr, "forwarding weak 0x%08x ",
+                                                (uint)w);
+                        if (GC_isPointer (w->object)
+                                and (not s->amInMinorGC
+                                        or isInNursery (s, w->object))) {
+                                if (DEBUG_WEAK)
+                                        fprintf (stderr, "linking\n");
+                                w->link = s->weaks;
+                                s->weaks = w;
+                        } else {
+                                if (DEBUG_WEAK)
+                                        fprintf (stderr, "not linking\n");
+                        }
+                }
+                /* Store the forwarding pointer in the old object. */
+                *(word*)(p - WORD_SIZE) = FORWARDED;
+                *(pointer*)p = s->back + headerBytes;
+                /* Update the back of the queue. */
+                s->back += size + skip;
+                assert (isAligned ((uint)s->back + GC_NORMAL_HEADER_SIZE,
+                                        s->alignment));
+        }
+        *pp = *(pointer*)p;
+        assert (isInToSpace (s, *pp));
+}
+
+static void updateWeaks (GC_state s) {
+        GC_weak w;
+
+        for (w = s->weaks; w != NULL; w = w->link) {
+                assert ((pointer)BOGUS_POINTER != w->object);
+
+                if (DEBUG_WEAK)
+                        fprintf (stderr, "updateWeaks  w = 0x%08x  ", (uint)w);
+                if (FORWARDED == GC_getHeader ((pointer)w->object)) {
+                        if (DEBUG_WEAK)
+                                fprintf (stderr, "forwarded from 0x%08x to 0x%08x\n",
+                                                (uint)w->object,
+                                                (uint)*(pointer*)w->object);
+                        w->object = *(pointer*)w->object;
+                } else {
+                        if (DEBUG_WEAK)
+                                fprintf (stderr, "cleared\n");
+                        *(GC_getHeaderp((pointer)w)) = WEAK_GONE_HEADER;
+                        w->object = (pointer)BOGUS_POINTER;
+                }
+        }
+        s->weaks = NULL;
+}
+
+static void swapSemis (GC_state s) {
+        struct GC_heap h;
+
+        h = s->heap2;
+        s->heap2 = s->heap;
+        s->heap = h;
+        setCardMapForMutator (s);
+}
+
+static inline bool detailedGCTime (GC_state s) {
+        return s->summary;
+}
+
+static void cheneyCopy (GC_state s) {
+        struct rusage ru_start;
+        pointer toStart;
+
+        assert (s->heap2.size >= s->oldGenSize);
+        if (detailedGCTime (s))
+                startTiming (&ru_start);
+        s->numCopyingGCs++;
+        s->toSpace = s->heap2.start;
+        s->toLimit = s->heap2.start + s->heap2.size;
+        if (DEBUG or s->messages) {
+                fprintf (stderr, "Major copying GC.\n");
+                fprintf (stderr, "fromSpace = 0x%08x of size %s\n", 
+                                (uint) s->heap.start,
+                                uintToCommaString (s->heap.size));
+                fprintf (stderr, "toSpace = 0x%08x of size %s\n",
+                                (uint) s->heap2.start,
+                                uintToCommaString (s->heap2.size));
+        }
+        assert (s->heap2.start != (void*)NULL);
+        /* The next assert ensures there is enough space for the copy to succeed.
+         * It does not assert (s->heap2.size >= s->heap.size) because that
+         * is too strong.
+         */
+        assert (s->heap2.size >= s->oldGenSize);
+        toStart = alignFrontier (s, s->heap2.start);
+        s->back = toStart;
+        foreachGlobal (s, forward);
+        foreachPointerInRange (s, toStart, &s->back, TRUE, forward);
+        updateWeaks (s);
+        s->oldGenSize = s->back - s->heap2.start;
+        s->bytesCopied += s->oldGenSize;
+        if (DEBUG)
+                fprintf (stderr, "%s bytes live.\n", 
+                                uintToCommaString (s->oldGenSize));
+        swapSemis (s);
+        clearCrossMap (s);
+        s->lastMajor = GC_COPYING;
+        if (detailedGCTime (s))
+                stopTiming (&ru_start, &s->ru_gcCopy);          
+        if (DEBUG or s->messages)
+                fprintf (stderr, "Major copying GC done.\n");
+}

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/cheney-copy.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/cheney-copy.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,7 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/debug.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/debug.c	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,29 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+#ifndef DEBUG
+#define DEBUG FALSE
+#endif
+
+enum {
+        DEBUG_ARRAY = FALSE,
+        DEBUG_CALL_STACK = FALSE,
+        DEBUG_CARD_MARKING = FALSE,
+        DEBUG_DETAILED = FALSE,
+        DEBUG_ENTER_LEAVE = FALSE,
+        DEBUG_GENERATIONAL = FALSE,
+        DEBUG_MARK_COMPACT = FALSE,
+        DEBUG_PROFILE = FALSE,
+        DEBUG_RESIZING = FALSE,
+        DEBUG_SHARE = FALSE,
+        DEBUG_SIZE = FALSE,
+        DEBUG_STACKS = FALSE,
+        DEBUG_THREADS = FALSE,
+        DEBUG_WEAK = FALSE,
+        DEBUG_WORLD = FALSE,
+};

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.c	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.c	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1 @@
+#include "gc.h"

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.h	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_prefix.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,2 @@
+#ifndef _MLTON_GC_H_
+#define _MLTON_GC_H_

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,7 @@
+
+typedef struct GC_state {
+  struct GC_heap heap;
+  struct GC_heap secondaryHeap; /* Used for major copying collection. */
+  GC_objectType *objectTypes; /* Array of object types. */
+  uint32_t objectTypesSize; /* Cardinality of objectTypes array. */
+} *GC_state;

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_suffix.c
===================================================================

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_suffix.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_suffix.h	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_suffix.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1 @@
+#endif /* _MLTON_GC_H_ */

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/heap.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,23 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/*
+ * All ML objects (including ML execution stacks) are allocated in a
+ * contiguous heap.  The heap has the following general layout:
+ * 
+ * ---------------------------------------------------
+ * |                                                 |
+ * ---------------------------------------------------
+ * ^
+ * start
+*/
+
+typedef struct GC_heap {
+  uint32_t size;
+  pointer start;          /* start of memory area */
+} *GC_heap;

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/mark-sweep.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/mark-sweep.c	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,7 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/mark-sweep.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/mark-sweep.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,7 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.c	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,29 @@
+/* Copyright (C) 2005-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+static inline pointer objptrToPointer (objptr O, pointer B) {
+  intptr_t O_ = (intptr_t)O;
+  intptr_t B_;
+  if GC_MODEL_B {
+    B_ = (intptr_t)B;
+  } else {
+    B_ = 0;
+  }
+  return (pointer)((O_ << GC_MODEL_S) + B_);
+}
+
+static inline objptr pointerToObjptr (pointer P, pointer B) {
+  intptr_t P_ = (intptr_t)P;
+  intptr_t B_;
+
+  if GC_MODEL_B {
+    B_ = (intptr_t)B;
+  } else {
+    B_ = 0;
+  }
+  return (objptr)((P_ - B_) >> GC_MODEL_S);
+}

Added: mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h	2005-09-01 21:50:41 UTC (rev 4059)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/model.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,146 @@
+/* Copyright (C) 2005-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/*
+Consider the following schemes for representing object pointers and
+mapping them to 64-bit native pointers.
+
+A. 32 bits, with bottom two bits zero.
+B. 32 bits, with bottom bit zero, shift left by one.
+C. 32 bits, with bottom bit zero, shift left by two.
+D. 32 bits, shift left by two.
+E. 32 bits, shift left by three.
+F. 40 bits.
+G. 64 bits.
+
+These schemes vary in the number of bits to represent a pointer in an
+object, the time to load a pointer from memory into a register, the
+amount of addressable memory, and the object alignment.
+
+        bits    time    mem(G)  align
+A       32      fast      4     4
+B       32      slow      8     4
+C       32      slow     16     8
+D       32      slow     16     4
+E       32      slow     32     8
+F       40      slow    256     4
+G       64      fast     4G     8
+
+Each of the (A-F) has a variant (AX-FX) in which pointers are added to
+some constant base address.  This gives access to any region in the
+virtual address space instead of just the low addresses.
+
+The following diagram demonstrates what portion of the native pointer
+to which the object pointer corresponds.
+
+64                              32                              0
+|                               |                               |
+-----------------------------------------------------------------
+
+                                 ==============================00   A
+
+                                ===============================0    B
+
+                               ===============================0     C
+
+                               ================================     D
+
+                             =================================      E
+
+                         ========================================   F
+
+ ================================================================   G
+
+Algorithmically, we can compute the native pointer (P) from the object
+pointer (O) (with bitsize Z), given a shift (S) and a base (B):
+
+P = %add64(%shl64(%zxZ_64(O),S),B)
+
+Likewise, we can compute the object pointer (O) from the native
+pointer (P), given a shift (S) and a base (B):
+
+O = %lobits64_Z(%shr64(%sub64(P,B),S))
+
+Hence, each of the schemes may be characterized by the size Z of the
+object pointer, the shift S, and whether or not the base B is zero.
+Noting that
+ %zx64_64(x) = x
+ %shl64(x, 0) = x
+ %add64(x, 0) = x
+ %lobits64_64(x) = x
+ %shr64(x, 0) = x
+ %sub64(x, 0) = x
+it is easy to compute the number of ALU operations required by each
+scheme:
+
+A  :: Z = 32, S == 0, B == 0   ops = 1
+AX :: Z = 32, S == 0, B != 0   ops = 2
+B  :: Z = 32, S == 1, B == 0   ops = 2
+BX :: Z = 32, S == 1, B != 0   ops = 3
+C  :: Z = 32, S == 2, B == 0   ops = 2
+CX :: Z = 32, S == 2, B != 0   ops = 3
+D  :: Z = 32, S == 2, B == 0   ops = 2
+DX :: Z = 32, S == 2, B != 0   ops = 3
+E  :: Z = 32, S == 3, B == 0   ops = 2
+EX :: Z = 32, S == 3, B != 0   ops = 3
+F  :: Z = 40, S == 0, B == 0   ops = 1 (#)
+FX :: Z = 40, S == 0, B != 0   ops = 2 (#)
+G  :: Z = 64, S == 0, B == 0   ops = 0
+
+#: In schemes F and FX, the conversion from object pointer to native
+pointer requires logical-shift-right, rather than zero-extend, since
+the object pointer would be fetched from memory as a 64-bit value.
+The cost may actually be higher, as storing an object pointer in
+memory requires some care so as not to overwrite neighboring data.
+
+It is not clear that any of the thirteen schemes dominates another.
+Here are some thoughts.
+
+(A) This is is what we have now, but is still useful on 64-bit
+machines where the bottom 4G may be less cluttered than on a 32-bit
+machine.
+
+(AX) seems like a nice cost/benefit tradeoff for a program that only
+needs 4G of memory, since the base can be used to find a contiguous 4G
+somewhere in the address space.
+
+(B) and (C) are similar, the tradeoff being to increase object
+alignment requirements in order to allow more memory.  Importantly,
+pointers having a bottom zero bit means that we can still set the
+bottom bit to one to represent small values in sum types.
+
+(D) and (E) are problematic because they leave no room to represent 
+small objects in sum types with pointers.  I think that really rules
+them out. 
+
+(F) costs some in object alignment because a sequence of pointers in
+an object may have to be padded to meet 4-byte alignment.  Loading a
+pointer from memory into a register may be slightly faster than in
+(B) or (C) because we don't have to shift, but I wonder if that
+matters.
+
+(G) costs the most in space, but has the fastest load time for
+pointers of the schemes that allow access to 4G of memory.
+
+
+A reasonable tradeoff in implementation complexity vs allowing our
+users enough flexibility might be to provide:
+
+        A, AX, B, BX, C, CX, G
+
+After some experiments on those, we might be able to find a more
+manageable set for users.
+*/
+
+#define GC_MODEL_Z  32
+#define GC_MODEL_S  1
+#define GC_MODEL_B  TRUE
+#define OBJPTR_TYPE__(z) uint ## z ## _t
+#define OBJPTR_TYPE_(z) OBJPTR_TYPE__(z)
+#define OBJPTR_TYPE OBJPTR_TYPE_(GC_MODEL_Z)
+typedef OBJPTR_TYPE objptr;
+#define OBJPTR_SIZE (sizeof(objptr) / 4)

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.c	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,61 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/*
+ * Build the header for an object, given the index to its type info.
+ */
+static inline GC_header GC_objectHeader (uint32_t t) {
+        assert (t < TWOPOWER (TYPE_INDEX_BITS));
+        return 1 | (t << 1);
+}
+
+#define STACK_HEADER GC_objectHeader (STACK_TYPE_INDEX)
+#define STRING_HEADER GC_objectHeader (STRING_TYPE_INDEX)
+#define THREAD_HEADER GC_objectHeader (THREAD_TYPE_INDEX)
+#define WEAK_GONE_HEADER GC_objectHeader (WEAK_GONE_TYPE_INDEX)
+#define WORD8_VECTOR_HEADER GC_objectHeader (WORD8_TYPE_INDEX)
+
+#define SPLIT_HEADER()                                                          \
+  do {                                                                          \
+    int objectTypeIndex;                                                        \
+    GC_objectType *t;                                                           \
+                                                                                \
+    assert (1 == (header & 1));                                                 \
+    objectTypeIndex = (header & TYPE_INDEX_MASK) >> TYPE_INDEX_SHIFT;           \
+    assert (0 <= objectTypeIndex                                                \
+            and objectTypeIndex < s->objectTypesSize);                          \
+    t = &s->objectTypes [objectTypeIndex];                                      \
+    tag = t->tag;                                                               \
+    hasIdentity = t->hasIdentity;                                               \
+    numNonPointers = t->numNonPointers;                                         \
+    numPointers = t->numPointers;                                               \
+    if (DEBUG_DETAILED)                                                         \
+      fprintf (stderr,                                                          \
+               "SPLIT_HEADER ("FMTHDR")"                                        \
+               "  tag = %s"                                                     \
+               "  hasIdentity = %u"                                             \
+               "  numNonPointers = %"PRIu16                                     \
+               "  numPointers = %"PRIu16"\n",                                   \
+               header,                                                          \
+               tagToString(tag), hasIdenity, numNonPointers, numPointers);      \
+  } while (0)
+
+static char* tagToString (GC_objectTypeTag tag) {
+  switch (tag) {
+  case ARRAY_TAG:
+    return "ARRAY";
+  case NORMAL_TAG:
+    return "NORMAL";
+  case STACK_TAG:
+    return "STACK";
+  case WEAK_TAG:
+    return "WEAK";
+  default:
+    die ("bad tag %u", tag);
+  }
+}

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/gc.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/object.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,123 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+/* 
+ * There are four kinds of ML objects: 
+ *   array, normal (fixed size), stack, and weak.
+ */
+typedef enum { 
+        ARRAY_TAG,
+        NORMAL_TAG,
+        STACK_TAG,
+        WEAK_TAG,
+} GC_objectTypeTag;
+
+/*
+ * Each object has a header, which immediately precedes the object data.
+ * A header has the following bit layout:
+ * 
+ * 00        : 1
+ * 01 - 19   : type index bits, index into GC_state->objectTypes.
+ * 20 - 30   : counter bits, used by mark compact GC (initially 0)
+ *      31   : mark bit, used by mark compact GC (initially 0)
+ */
+typedef uint32_t GC_header;
+#define PRIxHDR PRIx32
+#define FMTHDR "0x%08"PRIxHDR
+enum {
+  GC_HEADER_SIZE =   4,
+  TYPE_INDEX_BITS =  19,
+  TYPE_INDEX_MASK =  0x000FFFFE,
+  TYPE_INDEX_SHIFT = 1,
+  COUNTER_BITS =     10,
+  COUNTER_MASK =     0x7FF00000,
+  COUNTER_SHIFT =    20,
+  MARK_BITS =        1,
+  MARK_MASK =        0x80000000,
+  MARK_SHIFT =       31
+};
+
+/*
+ * Normal objects have the following layout:
+ *
+ * header word32 :: 
+ * (non heap-pointers)* :: 
+ * (heap pointers)*
+ *
+ * Note that the non heap-pointers denote a sequence of primitive data
+ * values.  These data values need not map directly to values of the
+ * native word size.  MLton's aggressive representation strategies may
+ * pack multiple primitive values into the same native word.
+ * Likewise, a primitive value may span multiple native words (e.g.,
+ * Word64.word).
+*/
+enum {
+  GC_NORMAL_HEADER_SIZE = GC_HEADER_SIZE,
+};
+
+/*
+ * Array objects have the following layout:
+ * 
+ * counter word32 :: 
+ * length word32 :: 
+ * header word32 :: 
+ * ( (non heap-pointers)* :: (heap pointers)* )*
+ *
+ * The counter word is used by mark compact GC.  The length word is
+ * the number of elements in the array.  Array elements have the same
+ * individual layout as normal objects, omitting the header word.
+ */
+enum {
+  GC_ARRAY_LENGTH_SIZE =  4,
+  GC_ARRAY_COUNTER_SIZE = GC_ARRAY_LENGTH_SIZE,
+  GC_ARRAY_HEADER_SIZE =  GC_ARRAY_COUNTER_SIZE + GC_ARRAY_LENGTH_SIZE + GC_HEADER_SIZE,
+};
+
+/* Stack objects are described in stack.h */
+
+/* Weak objects are described in weak.h */
+
+
+/* 
+ * The type index of a header is an index into an array of object
+ * types, where each element describes the layout of an object.  The
+ * object types array is declared as:
+ *
+ *  GC_objectType *objectTypes;
+ *
+ * The objectTypes pointer is initialized to point to a static array
+ * of object types that is emitted for each compiled program.  The
+ * hasIdentity field indicates whether or not the object has mutable
+ * fields, in which case it may not be hash-cons-ed.  In a normal
+ * object, the numNonPointers field indicates the number of 32-bit
+ * words of non heap-pointer data, while the numPointers field
+ * indicates the number of heap pointers.  In an array object, the
+ * numNonPointers field indicates the number of bytes of non
+ * heap-pointer data, while the numPointers field indicates the number
+ * of heap pointers.  In a stack object, the numNonPointers and
+ * numPointers fields are irrelevant.  In a weak object, the
+ * numNonPointers and numPointers fields are interpreted as in a
+ * normal object (and, hence, must be (0,1) or (0,0)).
+*/
+typedef struct {
+        /* Keep tag first, at zero offset, since it is referenced most often. */
+        GC_objectTypeTag tag;
+        bool hasIdentity;
+        uint16_t numNonPointers;
+        uint16_t numPointers;
+} GC_objectType;
+enum {
+  /* The type indices here must agree with those in backend/rep-type.fun. */
+  STACK_TYPE_INDEX =         0,
+  STRING_TYPE_INDEX =        1,
+  THREAD_TYPE_INDEX =        2,
+  WEAK_GONE_TYPE_INDEX =     3,
+  WORD8_VECTOR_TYPE_INDEX =  STRING_TYPE_INDEX,
+  WORD32_VECTOR_TYPE_INDEX = 4,
+  WORD16_VECTOR_TYPE_INDEX = 5,
+};

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h (from rev 4025, mlton/branches/on-20050822-x86_64-branch/runtime/platform.h)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/platform.h	2005-08-22 22:48:34 UTC (rev 4025)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h	2005-09-04 00:58:04 UTC (rev 4060)
@@ -0,0 +1,54 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+#ifndef _UTIL_H_
+#define _UTIL_H_
+
+#define _ISOC99_SOURCE
+#define _BSD_SOURCE
+
+/* Only enable _POSIX_C_SOURCE on platforms that don't have broken system
+ * headers.
+ */
+#if (defined (__linux__))
+#define _POSIX_C_SOURCE 200112L
+#endif
+
+/* C99-specific headers */
+#include <stddef.h>
+#include <stdbool.h>
+#include <iso646.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "../assert.h"
+
+#define TWOPOWER(n) (1 << (n))
+
+#ifndef TRUE
+#define TRUE    (0 == 0)
+#endif
+#ifndef FALSE
+#define FALSE   (not TRUE)
+#endif
+#define unless(p)       if (not (p))
+
+/* issue error message and exit */
+extern void die (char *fmt, ...)
+                        __attribute__ ((format(printf, 1, 2)))
+                        __attribute__ ((noreturn));
+/* issue error message and exit.  Also print strerror(errno). */
+extern void diee (char *fmt, ...)
+                        __attribute__ ((format(printf, 1, 2)))
+                        __attribute__ ((noreturn));
+
+typedef void* pointer;
+#define FMTPTR "0x%08"PRIxPTR
+
+#endif /* _UTIL_H_ */