[MLton-commit] r4120

Matthew Fluet MLton@mlton.org
Tue, 25 Oct 2005 17:53:15 -0700


Profiling functions
----------------------------------------------------------------------

U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/array-allocate.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/garbage-collection.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/init.c
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.h
A   mlton/branches/on-20050822-x86_64-branch/runtime/gc/read_write.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/safe.c
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/signals.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h
U   mlton/branches/on-20050822-x86_64-branch/runtime/gc/virtual-memory.h

----------------------------------------------------------------------

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/Makefile	2005-10-26 00:53:09 UTC (rev 4120)
@@ -75,6 +75,7 @@
 	gc_prefix.c							\
 	util.c								\
 	safe.c								\
+	read_write.c							\
 	rusage.c							\
 	debug.c								\
 	align.c								\
@@ -90,12 +91,12 @@
 	stack_predicates.c						\
 	stack.c								\
 	thread.c							\
+	generational.c							\
+	current.c   							\
 	foreach.c							\
 	translate.c							\
-	generational.c							\
 	heap_predicates.c						\
 	heap.c								\
-	current.c   							\
 	ratios_predicates.c						\
 	atomic.c							\
 	gc_state.c   							\
@@ -110,9 +111,10 @@
 	garbage-collection.c						\
 	array-allocate.c						\
 	copy-thread.c							\
-	init.c								\
 	pack.c								\
 	size.c								\
+	profiling.c							\
+	init.c								\
 	assumptions.c							\
 	gc_suffix.c
 

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/array-allocate.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/array-allocate.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/array-allocate.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -75,23 +75,26 @@
     pointer p;
     
     if (0 == numNonObjptrs)
-      for (p = frontier; 
-           p < last; 
-           p += OBJPTR_SIZE)
+      for (p = frontier; p < last; p += OBJPTR_SIZE)
         *((objptr*)p) = BOGUS_OBJPTR;
-    else
-      for (p = frontier; 
-           p < last; ) {
+    else {
+      /* Array with a mix of pointers and non-pointers. */
+      size_t nonObjptrBytes;
+      size_t objptrBytes;
+        
+      nonObjptrBytes = numNonObjptrsToBytes(numNonObjptrs, ARRAY_TAG);
+      objptrBytes = numObjptrs * OBJPTR_SIZE;
+
+      for (p = frontier; p < last; ) {
         pointer next;
         
-        p += numNonObjptrsToBytes(numNonObjptrs, ARRAY_TAG);
-        next = p + numObjptrs * OBJPTR_SIZE;
+        p += nonObjptrBytes;
+        next = p + objptrBytes;
         assert (next <= last);
-        while (p < next) {
+        for ( ; p < next; p += OBJPTR_SIZE)
           *((objptr*)p) = BOGUS_OBJPTR;
-          p += OBJPTR_SIZE;
-        }
       }
+    }
   }
   GC_profileAllocInc (s, arraySize);
   if (DEBUG_ARRAY) {

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/foreach.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -81,7 +81,7 @@
   } else if (ARRAY_TAG == tag) {
     size_t bytesPerElement;
     size_t dataBytes;
-    pointer max;
+    pointer last;
     GC_arrayLength numElements;
     
     numElements = getArrayLength (p);
@@ -99,10 +99,10 @@
       /* No pointers to process. */
       ;
     else {
-      max = p + dataBytes;
+      last = p + dataBytes;
       if (0 == numNonObjptrs)
         /* Array with only pointers. */
-        for ( ; p < max; p += OBJPTR_SIZE)
+        for ( ; p < last; p += OBJPTR_SIZE)
           maybeCall (f, s, (objptr*)p);
       else {
         /* Array with a mix of pointers and non-pointers. */
@@ -113,18 +113,18 @@
         objptrBytes = numObjptrs * OBJPTR_SIZE;
 
         /* For each array element. */
-        while (p < max) {
-          pointer max2;
+        for ( ; p < last; ) {
+          pointer next;
           
           /* Skip the non-pointers. */
           p += nonObjptrBytes;
-          max2 = p + objptrBytes;
+          next = p + objptrBytes;
           /* For each internal pointer. */
-          for ( ; p < max2; p += OBJPTR_SIZE) 
+          for ( ; p < next; p += OBJPTR_SIZE) 
             maybeCall (f, s, (objptr*)p);
         }
       }
-      assert (p == max);
+      assert (p == last);
       p -= dataBytes;
     }
     p += pad (s, dataBytes, GC_ARRAY_HEADER_SIZE);
@@ -147,9 +147,9 @@
     assert (stack->used <= stack->reserved);
     while (top > bottom) {
       /* Invariant: top points just past a "return address". */
-      returnAddress = *(GC_returnAddress*) (top - GC_RETURNADDRESS_SIZE);
+      returnAddress = *((GC_returnAddress*)(top - GC_RETURNADDRESS_SIZE));
       if (DEBUG) {
-        fprintf (stderr, "  top = "FMTPTR"  return address = "FMTPTR"\n",
+        fprintf (stderr, "  top = "FMTPTR"  return address = "FMTRA"\n",
                  (uintptr_t)top, returnAddress);
       }
       frameLayout = getFrameLayoutFromReturnAddress (s, returnAddress);
@@ -207,3 +207,37 @@
   }
   return front;
 }
+
+
+typedef void (*GC_foreachStackFrameFun) (GC_state s, GC_frameIndex i);
+
+/* Apply f to the frame index of each frame in the current thread's stack. */
+void foreachStackFrame (GC_state s, GC_foreachStackFrameFun f) {
+  pointer bottom;
+  GC_frameIndex index;
+  GC_frameLayout *layout;
+  GC_returnAddress returnAddress;
+  pointer top;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "foreachStackFrame\n");
+  bottom = stackBottom (s, currentThreadStack(s));
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "  bottom = "FMTPTR"  top = "FMTPTR".\n",
+             (uintptr_t)bottom, (uintptr_t)s->stackTop);
+  for (top = s->stackTop; top > bottom; top -= layout->size) {
+    returnAddress = *((GC_returnAddress*)(top - GC_RETURNADDRESS_SIZE));
+    index = getFrameIndexFromReturnAddress (s, returnAddress);
+    if (DEBUG_PROFILE)
+      fprintf (stderr, "top = "FMTPTR"  index = "FMTFI"\n",
+               (uintptr_t)top, index);
+    unless (index < s->frameLayoutsLength)
+      die ("top = "FMTPTR"  returnAddress = "FMTRA"  index = "FMTFI"\n",
+           (uintptr_t)top, (uintptr_t)returnAddress, index);
+    f (s, index);
+    layout = &(s->frameLayouts[index]);
+    assert (layout->size > 0);
+  }
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "done foreachStackFrame\n");
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/garbage-collection.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/garbage-collection.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/garbage-collection.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -47,22 +47,22 @@
 }
 
 static inline void enterGC (GC_state s) {
-  if (s->profilingInfo.isOn) {
+  if (s->profiling.isOn) {
     /* We don't need to profileEnter for count profiling because it
      * has already bumped the counter.  If we did allow the bump, then
      * the count would look like function(s) had run an extra time.
      */  
-    if (s->profilingInfo.stack
-        and not (PROFILE_COUNT == s->profilingInfo.kind))
+    if (s->profiling.stack
+        and not (PROFILE_COUNT == s->profiling.kind))
       GC_profileEnter (s);
   }
   s->amInGC = TRUE;
 }
 
 static inline void leaveGC (GC_state s) {
-  if (s->profilingInfo.isOn) {
-    if (s->profilingInfo.stack
-        and not (PROFILE_COUNT == s->profilingInfo.kind))
+  if (s->profiling.isOn) {
+    if (s->profiling.stack
+        and not (PROFILE_COUNT == s->profiling.kind))
       GC_profileLeave (s);
   }
   s->amInGC = FALSE;

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/gc_state.h	2005-10-26 00:53:09 UTC (rev 4120)
@@ -30,12 +30,13 @@
   struct GC_lastMajorStatistics lastMajorStatistics;
   pointer limit; /* limit = heap.start + heap.totalBytes */
   pointer limitPlusSlop; /* limit + LIMIT_SLOP */
+  uint32_t magic; /* The magic number for this executable. */
   uint32_t maxFrameSize;
   /*Bool*/bool mutatorMarksCards;
   GC_objectHashTable objectHashTable;
   GC_objectType *objectTypes; /* Array of object types. */
   uint32_t objectTypesLength; /* Cardinality of objectTypes array. */
-  struct GC_profilingInfo profilingInfo;
+  struct GC_profiling profiling;
   uint32_t (*returnAddressToFrameIndex) (GC_returnAddress ra);
   struct GC_ratios ratios;
   bool rusageIsEnabled;

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/init.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/init.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/init.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -196,16 +196,16 @@
   frontier = s->frontier;
   for (i = 0; i < s->vectorInitsLength; ++i) {
     size_t bytesPerElement;
-    size_t numBytes;
+    size_t dataBytes;
     size_t objectSize;
     uint32_t typeIndex;
 
     bytesPerElement = inits[i].bytesPerElement;
-    numBytes = bytesPerElement * inits[i].numElements;
+    dataBytes = bytesPerElement * inits[i].numElements;
     objectSize = align (GC_ARRAY_HEADER_SIZE
-                        + ((0 == numBytes)
+                        + ((0 == dataBytes)
                            ? POINTER_SIZE
-                           : numBytes),
+                           : dataBytes),
                         s->alignment);
     assert (objectSize <= (size_t)(s->heap.start + s->heap.size - frontier));
     *((GC_arrayCounter*)(frontier)) = 0;
@@ -232,7 +232,7 @@
     if (DEBUG_DETAILED)
       fprintf (stderr, "allocated vector at "FMTPTR"\n",
                (uintptr_t)(s->globals[inits[i].globalIndex]));
-    GC_memcpy (inits[i].bytes, frontier, numBytes);
+    GC_memcpy (inits[i].bytes, frontier, dataBytes);
     frontier += objectSize - GC_ARRAY_HEADER_SIZE;
   }
   if (DEBUG_DETAILED)

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.c (from rev 4119, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -0,0 +1,522 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+#define SOURCES_INDEX_UNKNOWN 0
+#define SOURCES_INDEX_GC      1
+#define SOURCE_SEQ_GC         1
+#define SOURCE_SEQ_UNKNOWN    0
+
+static uint32_t numStackFrames;
+static uint32_t *callStack;
+
+static void fillCallStack (__attribute__ ((unused))GC_state s, 
+                           GC_frameIndex i) {
+  if (DEBUG_CALL_STACK)
+    fprintf (stderr, "fillCallStack ("FMTFI")\n", i);
+  callStack[numStackFrames] = i;
+  numStackFrames++;
+}
+
+void GC_callStack (GC_state s, pointer p) {
+  if (DEBUG_CALL_STACK)
+    fprintf (stderr, "GC_callStack\n");
+  numStackFrames = 0;
+  callStack = (uint32_t*)p;
+  foreachStackFrame (s, fillCallStack);
+}
+
+static void incNumStackFrames (__attribute__ ((unused)) GC_state s, 
+                               __attribute__ ((unused)) GC_frameIndex i) {
+  numStackFrames++;
+}
+
+uint32_t GC_numStackFrames (GC_state s) {
+  numStackFrames = 0;
+  foreachStackFrame (s, incNumStackFrames);
+  if (DEBUG_CALL_STACK)
+    fprintf (stderr, "%"PRIu32" = GC_numStackFrames\n", numStackFrames);
+  return numStackFrames;
+}
+
+static inline uint32_t topFrameSourceSeqIndex (GC_state s, GC_stack stack) {
+  return s->profiling.frameSources[topFrameIndex (s, stack)];
+}
+
+uint32_t* GC_frameIndexSourceSeq (GC_state s, GC_frameIndex frameIndex) {
+  uint32_t *res;
+
+  res = s->profiling.sourceSeqs[s->profiling.frameSources[frameIndex]];
+  if (DEBUG_CALL_STACK)
+    fprintf (stderr, FMTPTR" = GC_frameIndexSourceSeq ("FMTFI")\n",
+             (uintptr_t)res, frameIndex);
+  return res;
+}
+
+inline char* GC_sourceName (GC_state s, uint32_t i) {
+  if (i < s->profiling.sourcesLength)
+    return s->profiling.sourceNames[s->profiling.sources[i].nameIndex];
+  else
+    return s->profiling.sourceNames[i - s->profiling.sourcesLength];
+}
+
+static inline GC_profileStack profileStackInfo (GC_state s, uint32_t i) {
+  assert (s->profiling.data != NULL);
+  return &(s->profiling.data->stack[i]);
+}
+
+static inline uint32_t profileMaster (GC_state s, uint32_t i) {
+  return s->profiling.sources[i].nameIndex + s->profiling.sourcesLength;
+}
+
+static inline void removeFromStack (GC_state s, uint32_t i) {
+  GC_profileData p;
+  GC_profileStack ps;
+  uintmax_t totalInc;
+
+  p = s->profiling.data;
+  ps = profileStackInfo (s, i);
+  totalInc = p->total - ps->lastTotal;
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "removing %s from stack  ticksInc = %"PRIuMAX"  ticksInGCInc = %"PRIuMAX"\n",
+             GC_sourceName (s, i), totalInc,
+             p->totalGC - ps->lastTotalGC);
+  ps->ticks += totalInc;
+  ps->ticksInGC += p->totalGC - ps->lastTotalGC;
+}
+
+static void setProfTimer (long usec) {
+  struct itimerval iv;
+  
+  iv.it_interval.tv_sec = 0;
+  iv.it_interval.tv_usec = usec;
+  iv.it_value.tv_sec = 0;
+  iv.it_value.tv_usec = usec;
+  unless (0 == setitimer (ITIMER_PROF, &iv, NULL))
+    die ("setProfTimer failed");
+}
+
+void GC_profileDone (GC_state s) {
+  GC_profileData p;
+  uint32_t sourceIndex;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "GC_profileDone ()\n");
+  assert (s->profiling.isOn);
+  if (PROFILE_TIME == s->profiling.kind)
+    setProfTimer (0);
+  s->profiling.isOn = FALSE;
+  p = s->profiling.data;
+  if (s->profiling.stack) {
+    for (sourceIndex = 0;
+         sourceIndex < s->profiling.sourcesLength + s->profiling.sourceNamesLength;
+         sourceIndex++) {
+      if (p->stack[sourceIndex].numOccurrences > 0) {
+        if (DEBUG_PROFILE)
+          fprintf (stderr, "done leaving %s\n",
+                   GC_sourceName (s, sourceIndex));
+        removeFromStack (s, sourceIndex);
+      }
+    }
+  }
+}
+
+static int profileDepth = 0;
+
+static void profileIndent (void) {
+  int i;
+
+  for (i = 0; i < profileDepth; ++i)
+    fprintf (stderr, " ");
+}
+
+static inline void profileEnterSource (GC_state s, uint32_t i) {
+  GC_profileData p;
+  GC_profileStack ps;
+  
+  p = s->profiling.data;
+  ps = profileStackInfo (s, i);
+  if (0 == ps->numOccurrences) {
+    ps->lastTotal = p->total;
+    ps->lastTotalGC = p->totalGC;
+  }
+  ps->numOccurrences++;
+}
+
+static void profileEnter (GC_state s, uint32_t sourceSeqIndex) {
+  uint32_t i;
+  GC_profileData p;
+  uint32_t sourceIndex;
+  uint32_t *sourceSeq;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "profileEnter (%"PRIu32")\n", sourceSeqIndex);
+  assert (s->profiling.stack);
+  assert (sourceSeqIndex < s->profiling.sourceSeqsLength);
+  p = s->profiling.data;
+  sourceSeq = s->profiling.sourceSeqs[sourceSeqIndex];
+  for (i = 1; i <= sourceSeq[0]; i++) {
+    sourceIndex = sourceSeq[i];
+    if (DEBUG_ENTER_LEAVE or DEBUG_PROFILE) {
+      profileIndent ();
+      fprintf (stderr, "(entering %s\n",
+               GC_sourceName (s, sourceIndex));
+      profileDepth++;
+    }
+    profileEnterSource (s, sourceIndex);
+    profileEnterSource (s, profileMaster (s, sourceIndex));
+  }
+}
+
+static void enterFrame (GC_state s, uint32_t i) {
+  profileEnter (s, s->profiling.frameSources[i]);
+}
+
+static inline void profileLeaveSource (GC_state s, uint32_t i) {
+  GC_profileData p;
+  GC_profileStack ps;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "profileLeaveSource (%"PRIu32")\n", i);
+  p = s->profiling.data;
+  ps = profileStackInfo (s, i);
+  assert (ps->numOccurrences > 0);
+  ps->numOccurrences--;
+  if (0 == ps->numOccurrences)
+    removeFromStack (s, i);
+}
+
+static void profileLeave (GC_state s, uint32_t sourceSeqIndex) {
+  int32_t i;
+  GC_profileData p;
+  uint32_t sourceIndex;
+  uint32_t *sourceSeq;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "profileLeave (%"PRIu32")\n", sourceSeqIndex);
+  assert (s->profiling.stack);
+  assert (sourceSeqIndex < s->profiling.sourceSeqsLength);
+  p = s->profiling.data;
+  sourceSeq = s->profiling.sourceSeqs[sourceSeqIndex];
+  for (i = sourceSeq[0]; i > 0; i--) {
+    sourceIndex = sourceSeq[i];
+    if (DEBUG_ENTER_LEAVE or DEBUG_PROFILE) {
+      profileDepth--;
+      profileIndent ();
+      fprintf (stderr, "leaving %s)\n",
+               GC_sourceName (s, sourceIndex));
+    }
+    profileLeaveSource (s, sourceIndex);
+    profileLeaveSource (s, profileMaster (s, sourceIndex));
+  }
+}
+
+static inline void profileInc (GC_state s, size_t amount, uint32_t sourceSeqIndex) {
+  uint32_t *sourceSeq;
+  uint32_t topSourceIndex;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "profileInc (%zu, %"PRIu32")\n",
+             amount, sourceSeqIndex);
+  assert (sourceSeqIndex < s->profiling.sourceSeqsLength);
+  sourceSeq = s->profiling.sourceSeqs[sourceSeqIndex];
+  topSourceIndex = sourceSeq[0] > 0 ? sourceSeq[sourceSeq[0]] : SOURCES_INDEX_UNKNOWN;
+  if (DEBUG_PROFILE) {
+    profileIndent ();
+    fprintf (stderr, "bumping %s by %zu\n",
+             GC_sourceName (s, topSourceIndex), amount);
+  }
+  s->profiling.data->countTop[topSourceIndex] += amount;
+  s->profiling.data->countTop[profileMaster (s, topSourceIndex)] += amount;
+  if (s->profiling.stack)
+    profileEnter (s, sourceSeqIndex);
+  if (SOURCES_INDEX_GC == topSourceIndex)
+    s->profiling.data->totalGC += amount;
+  else
+    s->profiling.data->total += amount;
+  if (s->profiling.stack)
+    profileLeave (s, sourceSeqIndex);
+}
+
+void GC_profileEnter (GC_state s) {
+  profileEnter (s, topFrameSourceSeqIndex (s, currentThreadStack (s)));
+}
+
+void GC_profileLeave (GC_state s) {
+  profileLeave (s, topFrameSourceSeqIndex (s, currentThreadStack (s)));
+}
+
+void GC_profileInc (GC_state s, size_t amount) {
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "GC_profileInc (%zu)\n", amount);
+  profileInc (s, amount,
+              s->amInGC 
+              ? SOURCE_SEQ_GC 
+              : topFrameSourceSeqIndex (s, currentThreadStack (s)));
+}
+
+void GC_profileAllocInc (GC_state s, size_t amount) {
+  if (s->profiling.isOn and (PROFILE_ALLOC == s->profiling.kind)) {
+    if (DEBUG_PROFILE)
+      fprintf (stderr, "GC_profileAllocInc (%u)\n", (uint)amount);
+    GC_profileInc (s, amount);
+  }
+}
+
+static void showProf (GC_state s) {
+  uint32_t i;
+  uint32_t j;
+  
+  fprintf (stdout, "0x%08"PRIx32"\n", s->magic);
+  fprintf (stdout, "%"PRIu32"\n", s->profiling.sourceNamesLength);
+  for (i = 0; i < s->profiling.sourceNamesLength; i++)
+    fprintf (stdout, "%s\n", s->profiling.sourceNames[i]);
+  fprintf (stdout, "%"PRIu32"\n", s->profiling.sourcesLength);
+  for (i = 0; i < s->profiling.sourcesLength; i++)
+    fprintf (stdout, "%"PRIu32" %"PRIu32"\n",
+             s->profiling.sources[i].nameIndex,
+             s->profiling.sources[i].successorsIndex);
+  fprintf (stdout, "%"PRIu32"\n", s->profiling.sourceSeqsLength);
+  for (i = 0; i < s->profiling.sourceSeqsLength; i++) {
+    uint32_t *sourceSeq;
+    
+    sourceSeq = s->profiling.sourceSeqs[i];
+    for (j = 1; j <= sourceSeq[0]; j++)
+      fprintf (stdout, "%"PRIu32" ", sourceSeq[j]);
+    fprintf (stdout, "\n");
+  }
+}
+
+GC_profileData GC_profileNew (GC_state s) {
+  GC_profileData p;
+  uint32_t size;
+
+  p = (GC_profileData)(malloc_safe (sizeof(*p)));
+  p->total = 0;
+  p->totalGC = 0;
+  size = s->profiling.sourcesLength + s->profiling.sourceNamesLength;
+  p->countTop = (uintmax_t*)(calloc_safe(size, sizeof(*(p->countTop))));
+  if (s->profiling.stack)
+    p->stack = 
+      (struct GC_profileStack *)
+      (calloc_safe(size, sizeof(*(p->stack))));
+  if (DEBUG_PROFILE)
+    fprintf (stderr, FMTPTR" = GC_profileNew ()\n", (uintptr_t)p);
+  return p;
+}
+
+void GC_profileFree (GC_state s, GC_profileData p) {
+  free (p->countTop);
+  if (s->profiling.stack)
+    free (p->stack);
+  free (p);
+}
+
+static void profileWriteCount (GC_state s, GC_profileData p, int fd, uint32_t i) {
+  writeUintmaxU (fd, p->countTop[i]);
+  if (s->profiling.stack) {
+    GC_profileStack ps;
+    
+    ps = &(p->stack[i]);
+    writeString (fd, " ");
+    writeUintmaxU (fd, ps->ticks);
+    writeString (fd, " ");
+    writeUintmaxU (fd, ps->ticksInGC);
+  }
+  writeNewline (fd);
+}
+
+void GC_profileWrite (GC_state s, GC_profileData p, int fd) {
+  uint32_t i;
+  char* kind;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "GC_profileWrite\n");
+  writeString (fd, "MLton prof\n");
+  kind = "";
+  switch (s->profiling.kind) {
+  case PROFILE_ALLOC:
+    kind = "alloc\n";
+    break;
+  case PROFILE_COUNT:
+    kind = "count\n";
+    break;
+  case PROFILE_NONE:
+    die ("impossible PROFILE_NONE");
+    break;
+  case PROFILE_TIME:
+    kind = "time\n";
+    break;
+  }
+  writeString (fd, kind);
+  writeString (fd, s->profiling.stack ? "stack\n" : "current\n");
+  writeUint32X (fd, s->magic);
+  writeNewline (fd);
+  writeUintmaxU (fd, p->total);
+  writeString (fd, " ");
+  writeUintmaxU (fd, p->totalGC);
+  writeNewline (fd);
+  writeUint32U (fd, s->profiling.sourcesLength);
+  writeNewline (fd);
+  for (i = 0; i < s->profiling.sourcesLength; i++)
+    profileWriteCount (s, p, fd, i);
+  writeUint32U (fd, s->profiling.sourceNamesLength);
+  writeNewline (fd);
+  for (i = 0; i < s->profiling.sourceNamesLength; i++)
+    profileWriteCount (s, p, fd, i + s->profiling.sourcesLength);
+}
+
+#if not HAS_TIME_PROFILING
+
+/* No time profiling on this platform.  There is a check in
+ * mlton/main/main.fun to make sure that time profiling is never
+ * turned on.
+ */
+static void profileTimeInit (GC_state s) __attribute__ ((noreturn));
+static void profileTimeInit (GC_state s) {
+  die ("no time profiling");
+}
+
+#else
+
+static GC_state catcherState;
+
+void GC_handleSigProf (pointer pc) {
+  GC_frameIndex frameIndex;
+  GC_state s;
+  uint32_t sourceSeqIndex;
+
+  s = catcherState;
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "GC_handleSigProf ("FMTPTR")\n", (uintptr_t)pc);
+  if (s->amInGC)
+    sourceSeqIndex = SOURCE_SEQ_GC;
+  else {
+    frameIndex = topFrameIndex (s, currentThreadStack (s));
+    if (C_FRAME == s->frameLayouts[frameIndex].kind)
+      sourceSeqIndex = s->profiling.frameSources[frameIndex];
+    else {
+      if (s->profiling.textStart <= pc and pc < s->profiling.textEnd)
+        sourceSeqIndex = s->profiling.textSources [pc - s->profiling.textStart];
+      else {
+        if (DEBUG_PROFILE)
+          fprintf (stderr, "pc out of bounds\n");
+        sourceSeqIndex = SOURCE_SEQ_UNKNOWN;
+      }
+    }
+  }
+  profileInc (s, 1, sourceSeqIndex);
+}
+
+static int compareSourceLabels (const void *v1, const void *v2) {
+  uintptr_t ui1;
+  uintptr_t ui2;
+
+  ui1 = (uintptr_t)v1;
+  ui2 = (uintptr_t)v2;
+
+  if (ui1 < ui2)
+    return -1;
+  else if (ui1 == ui2)
+    return 0;
+  else /* if (ui1 > ui2) */
+    return 1;
+}
+
+static void profileTimeInit (GC_state s) {
+  uint32_t i;
+  pointer p;
+  struct sigaction sa;
+  uint32_t sourceSeqsIndex;
+
+  s->profiling.data = GC_profileNew (s);
+  /* Sort sourceLabels by address. */
+  qsort (s->profiling.sourceLabels, 
+         s->profiling.sourceLabelsLength, 
+         sizeof (*s->profiling.sourceLabels),
+         compareSourceLabels);
+  if (0 == s->profiling.sourceLabels[s->profiling.sourceLabelsLength - 1].label)
+    die ("Max profile label is 0 -- something is wrong.");
+  if (DEBUG_PROFILE)
+    for (i = 0; i < s->profiling.sourceLabelsLength; i++)
+      fprintf (stderr, FMTPTR"  %"PRIu32"\n",
+               (uintptr_t)s->profiling.sourceLabels[i].label,
+               s->profiling.sourceLabels[i].sourceSeqsIndex);
+  if (ASSERT)
+    for (i = 1; i < s->profiling.sourceLabelsLength; i++)
+      assert (s->profiling.sourceLabels[i-1].label
+              <= s->profiling.sourceLabels[i].label);
+  /* Initialize s->textSources. */
+  s->profiling.textEnd = (pointer)(getTextEnd());
+  s->profiling.textStart = (pointer)(getTextStart());
+  if (ASSERT)
+    for (i = 0; i < s->profiling.sourceLabelsLength; i++) {
+      pointer label;
+
+      label = s->profiling.sourceLabels[i].label;
+      assert (0 == label
+              or (s->profiling.textStart <= label
+                  and label < s->profiling.textEnd));
+    }
+  s->profiling.textSources =
+    (uint32_t*)
+    (calloc_safe((size_t)(s->profiling.textEnd - s->profiling.textStart), 
+                 sizeof(*(s->profiling.textSources))));
+  p = s->profiling.textStart;
+  sourceSeqsIndex = SOURCE_SEQ_UNKNOWN;
+  for (i = 0; i < s->profiling.sourceLabelsLength; i++) {
+    for ( ; p < s->profiling.sourceLabels[i].label; p++)
+      s->profiling.textSources[p - s->profiling.textStart] = sourceSeqsIndex;
+    sourceSeqsIndex = s->profiling.sourceLabels[i].sourceSeqsIndex;
+  }
+  for ( ; p < s->profiling.textEnd; p++)
+    s->profiling.textSources[p - s->profiling.textStart] = sourceSeqsIndex;
+  /*
+   * Install catcher, which handles SIGPROF and calls MLton_Profile_inc.
+   *
+   * One thing I should point out that I discovered the hard way: If
+   * the call to sigaction does NOT specify the SA_ONSTACK flag, then
+   * even if you have called sigaltstack(), it will NOT switch stacks,
+   * so you will probably die.  Worse, if the call to sigaction DOES
+   * have SA_ONSTACK and you have NOT called sigaltstack(), it still
+   * switches stacks (to location 0) and you die of a SEGV.  Thus the
+   * sigaction() call MUST occur after the call to sigaltstack(), and
+   * in order to have profiling cover as much as possible, you want it
+   * to occur right after the sigaltstack() call.
+   */
+  catcherState = s;
+  sigemptyset (&sa.sa_mask);
+  setSigProfHandler (&sa);
+  unless (sigaction (SIGPROF, &sa, NULL) == 0)
+    diee ("sigaction() failed");
+  /* Start the SIGPROF timer. */
+  setProfTimer (10000);
+}
+
+#endif
+
+/* profileEnd is for writing out an mlmon.out file even if the C code
+ * terminates abnormally, e.g. due to running out of memory.  It will
+ * only run if the usual SML profile atExit cleanup code did not
+ * manage to run.
+ */
+static GC_state profileEndState;
+
+static void profileEnd (void) {
+  int fd;
+  GC_state s;
+
+  if (DEBUG_PROFILE)
+    fprintf (stderr, "profileEnd ()\n");
+  s = profileEndState;
+  if (s->profiling.isOn) {
+    fd = creat ("mlmon.out", 0666);
+    if (fd < 0)
+      diee ("Cannot create mlmon.out");
+    GC_profileWrite (s, s->profiling.data, fd);
+  }
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.h	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/profiling.h	2005-10-26 00:53:09 UTC (rev 4120)
@@ -23,7 +23,6 @@
   uint32_t sourceSeqsIndex;
 } *GC_sourceLabel;
 
-
 /* If profileStack, then there is one struct GC_profileStack for each
  * function.
  */
@@ -47,15 +46,15 @@
   uintmax_t numOccurrences;
 } *GC_profileStack;
 
-/* GC_profile is used for both time and allocation profiling.
+/* GC_profileData is used for both time and allocation profiling.
  * In the comments below, "ticks" mean clock ticks with time profiling and
  * bytes allocated with allocation profiling.
  *
- * All of the arrays in GC_profile are of length sourcesSize + sourceNamesSize.
- * The first sourceSizes entries are for handling the duplicate copies of 
- * functions, and the next sourceNamesSize entries are for the master versions.
+ * All of the arrays in GC_profileData are of length sourcesSize + sourceNamesSize.
+ * The first sourceLength entries are for handling the duplicate copies of 
+ * functions, and the next sourceNamesLength entries are for the master versions.
  */
-typedef struct GC_profile {
+typedef struct GC_profileData {
   /* countTop is an array that counts for each function the number of
    * ticks that occurred while the function was on top of the stack.
    */
@@ -68,30 +67,45 @@
   uintmax_t total;
   /* The total number of GC ticks. */
   uintmax_t totalGC;
-} *GC_profile;
+} *GC_profileData;
 
-struct GC_profilingInfo {
+struct GC_profiling {
+  GC_profileData data;
+  /* frameSources is an array of cardinality frameLayoutsLength that
+   * for each stack frame, gives an index into sourceSeqs of the
+   * sequence of source functions corresponding to the frame.
+   */
+  uint32_t *frameSources;
+  uint32_t frameSourcesLength;
   bool isOn;
-  GC_profile profile;
   GC_profileKind kind;
+  struct GC_sourceLabel *sourceLabels;
+  uint32_t sourceLabelsLength;
+  char **sourceNames;
+  uint32_t sourceNamesLength;
+  /* Each entry in sourceSeqs is a vector, whose first element is a
+   * length, and subsequent elements index into sources.
+   */
+  uint32_t **sourceSeqs;
+  uint32_t sourceSeqsLength;
+  /* sources is an array of cardinality sourcesLength.  Each entry
+   * specifies an index into sourceNames and an index into sourceSeqs,
+   * giving the name of the function and the successors, respectively.
+   */
+  struct GC_source *sources;
+  uint32_t sourcesLength;
   bool stack;
+  pointer textEnd;
+  /* An array of indices, one entry for each address in the text
+   * segment, giving and index into sourceSeqs.
+   */
+  uint32_t *textSources;
+  pointer textStart;
 };
 
 
 void GC_profileAllocInc (GC_state s, size_t bytes);
 
-void GC_profileDone (GC_state s);
-
 void GC_profileEnter (GC_state s);
 
-void GC_profileFree (GC_state s, GC_profile p);
-
-void GC_profileInc (GC_state s, size_t bytes);
-
 void GC_profileLeave (GC_state s);
-
-GC_profile GC_profileNew (GC_state s);
-
-void GC_profileWrite (GC_state s, GC_profile p, int fd);
-
-void showProf (GC_state s);

Copied: mlton/branches/on-20050822-x86_64-branch/runtime/gc/read_write.c (from rev 4119, mlton/branches/on-20050822-x86_64-branch/runtime/gc.c)
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/read_write.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -0,0 +1,38 @@
+/* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ */
+
+
+static void writeString (int fd, char* s) {
+  write_safe (fd, s, strlen(s));
+}
+
+static void writeUint32U (int fd, uint32_t u) {
+  char buf[(UINT32_MAX / 10) + 2];
+
+  sprintf (buf, "%"PRIu32, u);
+  writeString (fd, buf);
+}
+
+static void writeUintmaxU (int fd, uintmax_t u) {
+  // char buf[(UINTMAX_MAX / 10) + 2];
+  char buf[20];
+
+  sprintf (buf, "%"PRIuMAX, u);
+  writeString (fd, buf);
+}
+
+static void writeUint32X (int fd, uint32_t u) {
+  char buf[5 + (UINT32_MAX / 16) + 2];
+  
+  sprintf (buf, "0x%08"PRIx32, u);
+  writeString (fd, buf);
+}
+
+static inline void writeNewline (int fd) {
+        writeString (fd, "\n");
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/safe.c
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/safe.c	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/safe.c	2005-10-26 00:53:09 UTC (rev 4120)
@@ -92,3 +92,21 @@
     diee ("unlink (%s) failed.\n", pathname);
   return;
 }
+
+void read_safe (int fd, void *buf, size_t size) {
+  ssize_t res;
+
+  if (0 == size) return;
+  res = read (fd, buf, size);
+  if (res == -1 or (size_t)res != size)
+    diee ("read (_, _, _) failed.\n");
+}
+
+void write_safe (int fd, const void *buf, size_t size) {
+  ssize_t res;
+
+  if (0 == size) return;
+  res = write (fd, buf, size);
+  if (res == -1 or (size_t)res != size)
+    diee ("write (_, _, _) failed.\n");
+}

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/signals.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/signals.h	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/signals.h	2005-10-26 00:53:09 UTC (rev 4120)
@@ -7,18 +7,20 @@
  */
 
 struct GC_signalsInfo {
-  bool amInSignalHandler;   /* TRUE iff a signal handler is running. */
+  /* TRUE iff a signal handler is running. */
+  bool amInSignalHandler;   
   bool gcSignalHandled;
   bool gcSignalPending;
-  volatile bool signalIsPending; /* TRUE iff a signal has been received 
-                                  * but not handled by the mutator.
-                                  */
-  /* signalsHandled is the set of signals for which a mutator signal
-   * handler needs to run in order to handle the signal.
+  /* TRUE iff a signal has been received but not handled by the
+   * mutator.
    */
+  volatile bool signalIsPending; 
+  /* The signals for which a mutator signal handler needs to run in
+   * order to handle the signal.
+   */
   sigset_t signalsHandled;
-  /* The signals that have been recieved but not processed by the mutator
-   * signal handler.
+  /* The signals that have been recieved but not processed by the
+   * mutator signal handler.
    */
   sigset_t signalsPending;
 };

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/util.h	2005-10-26 00:53:09 UTC (rev 4120)
@@ -27,6 +27,7 @@
 #include <string.h>
 #include <math.h>
 
+#include <fcntl.h>
 #include <signal.h>
 #include <unistd.h>
 #include <sys/resource.h>

Modified: mlton/branches/on-20050822-x86_64-branch/runtime/gc/virtual-memory.h
===================================================================
--- mlton/branches/on-20050822-x86_64-branch/runtime/gc/virtual-memory.h	2005-10-22 14:08:21 UTC (rev 4119)
+++ mlton/branches/on-20050822-x86_64-branch/runtime/gc/virtual-memory.h	2005-10-26 00:53:09 UTC (rev 4120)
@@ -21,3 +21,7 @@
 void *GC_mremap (void *start, size_t oldLength, size_t newLength);
 void GC_release (void *base, size_t length);
 void GC_decommit (void *base, size_t length);
+
+void *getTextEnd (void);
+void *getTextStart (void);
+void setSigProfHandler (struct sigaction *sa);