[MLton-commit] r6441

spoons at mlton.org spoons at mlton.org
Mon Mar 3 07:26:30 PST 2008


Support processor-local allocation.

The GC_heap structure now defines the global nursery, while each
processor's GC_state tracks the per-processor allocation area.
Allocation from a processor-local area occurs just as for the single-
processor version of MLton.  When processor-local space is exhausted,
the processor attempts to take a chunk from the global space using an
atomic compare-and-swap operation.  If no global space is available,
the processor signals that a collection is necessary and waits for the
remaining processors to catch up.

One complicating factor is that we must ensure that the unused portion
of a processor's allocation area can always be filled.  To do so, we
add a small amount of space to each processor-local allocation area that
ensures that the area is large enough to be filled.  This additional
space is known as the "bonus slop" in the code.

----------------------------------------------------------------------

U   mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml
U   mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c
U   mlton/branches/shared-heap-multicore/runtime/gc/forward.c
U   mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c
U   mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h
U   mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c
U   mlton/branches/shared-heap-multicore/runtime/gc/heap.c
U   mlton/branches/shared-heap-multicore/runtime/gc/heap.h
U   mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c
U   mlton/branches/shared-heap-multicore/runtime/gc/init-world.c
U   mlton/branches/shared-heap-multicore/runtime/gc/new-object.c
U   mlton/branches/shared-heap-multicore/runtime/gc/object.h

----------------------------------------------------------------------

Modified: mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml
===================================================================
--- mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml	2008-03-03 15:26:28 UTC (rev 6441)
@@ -112,8 +112,9 @@
 
 structure FFI =
    struct
+   (* XXX all these symbols need to be per-processor *)
       val cpointerArray = #1 _symbol "MLton_FFI_CPointer": Pointer.t GetSet.t; ()
-      val getOp = #1 _symbol "MLton_FFI_op": Int32.t GetSet.t;
+      val getOp = _prim "FFI_getOp": unit -> Int32.int;
       val int8Array = #1 _symbol "MLton_FFI_Int8": Pointer.t GetSet.t; ()
       val int16Array = #1 _symbol "MLton_FFI_Int16": Pointer.t GetSet.t; ()
       val int32Array = #1 _symbol "MLton_FFI_Int32": Pointer.t GetSet.t; ()

Modified: mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -96,24 +96,40 @@
 
 void minorCheneyCopyGC (GC_state s) {
   size_t bytesAllocated;
+  size_t bytesFilled = 0;
   size_t bytesCopied;
   struct rusage ru_start;
 
   if (DEBUG_GENERATIONAL)
     fprintf (stderr, "minorGC  nursery = "FMTPTR"  frontier = "FMTPTR"\n",
-             (uintptr_t)s->heap.nursery, (uintptr_t)s->frontier);
+             (uintptr_t)s->heap->nursery, (uintptr_t)s->frontier);
   assert (invariantForGC (s));
-  bytesAllocated = s->frontier - s->heap.nursery;
+  /* XXX spoons not accurate if this doesn't account for gaps */
+  bytesAllocated = s->heap->frontier - s->heap->nursery;
   if (bytesAllocated == 0)
     return;
-  s->cumulativeStatistics.bytesAllocated += bytesAllocated;
   if (not s->canMinor) {
-    s->heap.oldGenSize += bytesAllocated;
+    for (int proc = 0; proc < s->numberOfProcs; proc++) {
+      /* Add in the bonus slop now since we need to fill it */
+      s->procStates[proc].limitPlusSlop += GC_BONUS_SLOP;
+      if (s->procStates[proc].limitPlusSlop != s->heap->frontier) {
+        /* Fill to avoid an uninitialized gap in the middle of the heap */
+        bytesFilled += fillGap (s, s->procStates[proc].frontier,
+                                s->procStates[proc].limitPlusSlop);
+      }
+      else {
+        /* If this is at the end of the heap there is no need to fill the gap
+         -- there will be no break in the initialized portion of the
+         heap.  Also, this is the last chunk allocated in the nursery, so it is
+         safe to use the frontier from this processor as the global frontier.  */
+        s->heap->oldGenSize = s->procStates[proc].frontier - s->heap->start;
+      }
+    }
     bytesCopied = 0;
   } else {
-    if (DEBUG_GENERATIONAL or s->controls.messages)
+    if (DEBUG_GENERATIONAL or s->controls->messages)
       fprintf (stderr, "[GC: Minor Cheney-copy; nursery at "FMTPTR" of size %s bytes,]\n",
-               (uintptr_t)(s->heap.nursery),
+               (uintptr_t)(s->heap->nursery),
                uintmaxToCommaString(bytesAllocated));
     if (detailedGCTime (s))
       startTiming (&ru_start);
@@ -145,4 +161,7 @@
       fprintf (stderr, "[GC: Minor Cheney-copy done; %s bytes copied.]\n",
                uintmaxToCommaString(bytesCopied));
   }
+  bytesAllocated -= bytesFilled;
+  s->cumulativeStatistics->bytesAllocated += bytesAllocated;
+  s->cumulativeStatistics->bytesFilled += bytesFilled;
 }

Modified: mlton/branches/shared-heap-multicore/runtime/gc/forward.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/forward.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/forward.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -155,7 +155,7 @@
     fprintf (stderr,
              "forwardObjptrIfInNursery  opp = "FMTPTR"  op = "FMTOBJPTR"  p = "FMTPTR"\n",
              (uintptr_t)opp, op, (uintptr_t)p);
-  assert (s->heap.nursery <= p and p < s->limitPlusSlop);
+  assert (s->heap->nursery <= p and p < s->heap->frontier);
   forwardObjptr (s, opp);
 }
 

Modified: mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -111,13 +111,25 @@
     : sizeofStackWithHeaderAligned (s, sizeofStackGrow (s, getStackCurrent (s)));
   totalBytesRequested = 
     oldGenBytesRequested 
-    + nurseryBytesRequested
     + stackBytesRequested;
+  getThreadCurrent(s)->bytesNeeded = nurseryBytesRequested;
+  for (int proc = 0; proc < s->numberOfProcs; proc++) {
+    /* It could be that other threads have already worked to satisfy their own
+       requests.  We need to make sure that we don't invalidate the work
+       they've done.
+    */
+    if (getThreadCurrent(&s->procStates[proc])->bytesNeeded == 0) {
+      getThreadCurrent(&s->procStates[proc])->bytesNeeded = GC_HEAP_LIMIT_SLOP;
+    }
+    totalBytesRequested += getThreadCurrent(&s->procStates[proc])->bytesNeeded;
+    totalBytesRequested += GC_BONUS_SLOP;
+  }
+
   if (forceMajor 
-      or totalBytesRequested > s->heap.size - s->heap.oldGenSize)
+      or totalBytesRequested > s->heap->availableSize - s->heap->oldGenSize)
     majorGC (s, totalBytesRequested, mayResize);
   setGCStateCurrentHeap (s, oldGenBytesRequested + stackBytesRequested, 
-                         nurseryBytesRequested);
+                         nurseryBytesRequested, false);
   assert (hasHeapBytesFree (s, oldGenBytesRequested + stackBytesRequested,
                             nurseryBytesRequested));
   unless (stackTopOk)
@@ -152,29 +164,249 @@
   leaveGC (s);
 }
 
-void ensureInvariantForMutator (GC_state s, bool force) {
-  if (force
-      or not (invariantForMutatorFrontier(s))
-      or not (invariantForMutatorStack(s))) {
-    /* This GC will grow the stack, if necessary. */
-    performGC (s, 0, getThreadCurrent(s)->bytesNeeded, force, TRUE);
+size_t fillGap (__attribute__ ((unused)) GC_state s, pointer start, pointer end) {
+  size_t diff = end - start;
+
+  if (diff == 0) {
+    return 0;
   }
-  assert (invariantForMutatorFrontier(s));
-  assert (invariantForMutatorStack(s));
+
+  if (DEBUG)
+    fprintf (stderr, "[GC: Filling gap between "FMTPTR" and "FMTPTR" (size = "FMTARRLEN").]\n", 
+             (uintptr_t)start, (uintptr_t)end, diff);
+
+  if (start) {
+    /* See note in the array case of foreach.c (line 103) */
+    if (diff >= GC_ARRAY_HEADER_SIZE + OBJPTR_SIZE) {
+      assert (diff >= GC_ARRAY_HEADER_SIZE);     
+      /* Counter */
+      *((GC_arrayCounter *)start) = 0;
+      start = start + GC_ARRAY_COUNTER_SIZE;
+      /* Length */
+      *((GC_arrayLength *)start) = diff - GC_ARRAY_HEADER_SIZE;
+      start = start + GC_ARRAY_LENGTH_SIZE;
+      /* Header */
+      *((GC_header *)start) = GC_WORD8_VECTOR_HEADER;
+      start = start + GC_HEADER_SIZE;
+    }
+    else if (diff == GC_HEADER_SIZE) {
+      *((GC_header *)start) = GC_HEADER_ONLY_HEADER;
+      start = start + GC_HEADER_SIZE;      
+    }
+    else if (diff >= GC_BONUS_SLOP) {
+      assert (diff < INT_MAX);
+      *((GC_header *)start) = GC_FILL_HEADER;
+      start = start + GC_HEADER_SIZE;
+      *((GC_smallGapSize *)start) = diff - (GC_HEADER_SIZE + GC_SMALL_GAP_SIZE_SIZE);
+      start = start + GC_SMALL_GAP_SIZE_SIZE;
+    }
+    else {
+      assert(0 == diff);
+      /* XXX */
+      fprintf (stderr, "FOUND A GAP OF "FMTARRLEN" BYTES!\n", diff);
+      exit (1);
+    }
+
+    /* XXX debug only */
+    /*
+    while (start < end) {
+      *(start++) = 0xDF;
+    }
+    */
+
+    return diff;
+  }  
+  else {
+    return 0;
+  }
 }
 
-/* ensureHasHeapBytesFree (s, oldGen, nursery) 
- */
-void ensureHasHeapBytesFree (GC_state s, 
-                             size_t oldGenBytesRequested,
-                             size_t nurseryBytesRequested) {
-  assert (s->heap.nursery <= s->limitPlusSlop);
-  assert (s->frontier <= s->limitPlusSlop);
-  if (not hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested))
-    performGC (s, oldGenBytesRequested, nurseryBytesRequested, FALSE, TRUE);
-  assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
+static void maybeSatisfyAllocationRequestLocally (GC_state s,
+                                                  size_t nurseryBytesRequested) {
+  /* First try and take another chunk from the shared nursery */
+  while (TRUE)
+  {
+    /* This is the only read of the global frontier -- never read it again
+       until after the swap. */
+    pointer oldFrontier = s->heap->frontier;
+    pointer newHeapFrontier, newProcFrontier;
+    pointer newStart;
+    /* heap->start and heap->size are read-only (unless you hold the global
+       lock) so it's ok to read them here */
+    size_t availableBytes = (size_t)((s->heap->start + s->heap->availableSize)
+                                     - oldFrontier);
+
+    /* If another thread is trying to get exclusive access, the join the
+       queue. */
+    if (Proc_threadInSection (s)) {
+      if (DEBUG)
+        fprintf (stderr, "[GC: aborting local alloc: mutex.]\n");
+      return;
+    }
+    /* See if the mutator frontier invariant is already true */
+    assert (s->limitPlusSlop >= s->frontier);
+    if (nurseryBytesRequested <= (size_t)(s->limitPlusSlop - s->frontier)) {
+      if (DEBUG)
+        fprintf (stderr, "[GC: aborting local alloc: satisfied.]\n");
+      return;
+    }
+    /* Perhaps there is not enough space in the nursery to satify this
+       request; if that's true then we need to do a full collection */
+    if (nurseryBytesRequested + GC_BONUS_SLOP > availableBytes) {
+      if (DEBUG)
+        fprintf (stderr, "[GC: aborting local alloc: no space.]\n");
+      return;
+    }
+
+    /* OK! We might possibly satisfy this request without the runtime lock!
+       Let's see what that will entail... */
+       
+    /* Now see if we were the most recent thread to allocate */
+    if (oldFrontier == s->limitPlusSlop + GC_BONUS_SLOP) {
+      /* This is the next chunk so no need to fill */
+      newHeapFrontier = s->frontier + nurseryBytesRequested + GC_BONUS_SLOP;
+      /* Leave "start" and "frontier" where they are */
+      newStart = s->start;
+      newProcFrontier = s->frontier;
+    }
+    else {               
+      /* Fill the old gap */
+      fillGap (s, s->frontier, s->limitPlusSlop + GC_BONUS_SLOP);
+      /* Don't update frontier or limitPlusSlop since we will either
+         overwrite them (if we succeed) or just fill the same gap again
+         (if we fail).  (There is no obvious other pair of values that
+         we can set them to that is safe.) */
+      newHeapFrontier = oldFrontier + nurseryBytesRequested + GC_BONUS_SLOP;
+      newProcFrontier = oldFrontier;
+      /* Move "start" since the space between old-start and frontier is not
+         necessary filled */
+      newStart = oldFrontier;
+    }
+    
+    if (__sync_bool_compare_and_swap (&s->heap->frontier, 
+                                      oldFrontier, newHeapFrontier)) {
+      if (DEBUG)
+        fprintf (stderr, "[GC: Local alloction of chunk @ "FMTPTR".]\n", 
+                 (uintptr_t)newProcFrontier);
+
+      s->start = newStart;
+      s->frontier = newProcFrontier;
+      assert (isFrontierAligned (s, s->frontier));
+      s->limitPlusSlop = newHeapFrontier - GC_BONUS_SLOP;
+      s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+
+      return;
+    }
+    else {
+      if (DEBUG)
+        fprintf (stderr, "[GC: Contention for alloction (frontier is "FMTPTR").]\n", 
+                 (uintptr_t)s->heap->frontier);
+    }
+  }
 }
 
+// assumes that stack->used and thread->exnstack are up to date
+// assumes exclusive access to runtime if !mustEnter
+// forceGC = force major collection
+void ensureHasHeapBytesFreeAndOrInvariantForMutator (GC_state s, bool forceGC,
+                                                     bool ensureFrontier,
+                                                     bool ensureStack,
+                                                     size_t oldGenBytesRequested,
+                                                     size_t nurseryBytesRequested) {
+  bool stackTopOk;
+  size_t stackBytesRequested;
+
+  /* To ensure the mutator frontier invariant, set the requested bytes
+     to include those needed by the thread.
+   */
+  if (ensureFrontier) {
+    nurseryBytesRequested += getThreadCurrent(s)->bytesNeeded;
+  }
+
+  /* XXX (sort of) copied from performGC */
+  stackTopOk = (not ensureStack) or invariantForMutatorStack (s);
+  stackBytesRequested =
+    stackTopOk 
+    ? 0 
+    : sizeofStackWithHeaderAligned (s, sizeofStackGrow (s, getStackCurrent (s)));
+
+  /* try to satisfy (at least part of the) request locally */
+  maybeSatisfyAllocationRequestLocally (s, nurseryBytesRequested + stackBytesRequested);
+
+  if (not stackTopOk
+      and (hasHeapBytesFree (s, 0, stackBytesRequested))) {
+    if (DEBUG or s->controls->messages)
+      fprintf (stderr, "GC: growing stack locally... [%d]\n",
+               s->procStates ? Proc_processorNumber (s) : -1);
+    growStackCurrent (s, FALSE);
+    setGCStateCurrentThreadAndStack (s);
+  }
+
+  if (DEBUG or s->controls->messages) {
+    fprintf (stderr, "GC: stackInvariant: %d,%d hasHeapBytesFree: %d inSection: %d force: %d [%d]\n", 
+             ensureStack, ensureStack and invariantForMutatorStack (s),
+             hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested),
+             Proc_threadInSection (s),
+             forceGC,
+             s->procStates ? Proc_processorNumber (s) : -1);
+  }
+
+  if (/* check the stack of the current thread */
+      ((ensureStack and not invariantForMutatorStack (s))
+          and (s->syncReason = SYNC_STACK))
+      /* this subsumes invariantForMutatorFrontier */
+      or (not hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested)
+            and (s->syncReason = SYNC_HEAP))
+      /* another thread is waiting for exclusive access */
+      or Proc_threadInSection (s)
+      /* we are forcing a major collection */
+      or (forceGC
+           and (s->syncReason = SYNC_FORCE))) {
+    /* Copy the value here so other threads will see it (if we synchronize and
+       one of the other threads does the work). */
+    if (isObjptr (getThreadCurrentObjptr(s)))
+      getThreadCurrent(s)->bytesNeeded = nurseryBytesRequested;
+
+    ENTER0 (s);
+    /* XXX should this go here? */
+    switchToSignalHandlerThreadIfNonAtomicAndSignalPending (s);
+
+    /* Recheck invariants now that we hold the lock */
+    if ((ensureStack and not invariantForMutatorStack (s))
+        or not hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested)
+        or forceGC) {
+      performGC (s, oldGenBytesRequested, nurseryBytesRequested, forceGC, TRUE);
+    }
+    else
+      if (DEBUG or s->controls->messages)
+        fprintf (stderr, "GC: Skipping GC (inside of sync). [%d]\n", s->procStates ? Proc_processorNumber (s) : -1);
+
+    LEAVE0 (s);
+  }
+  else {
+    if (DEBUG or s->controls->messages)
+      fprintf (stderr, "GC: Skipping GC (invariants already hold / request satisfied locally). [%d]\n", s->procStates ? Proc_processorNumber (s) : -1);
+
+    /* These are safe even without ENTER/LEAVE */
+    assert (isAligned (s->heap->size, s->sysvals.pageSize));
+    assert (isAligned ((size_t)s->heap->start, CARD_SIZE));
+    assert (isFrontierAligned (s, s->heap->start + s->heap->oldGenSize));
+    assert (isFrontierAligned (s, s->heap->nursery));
+    assert (isFrontierAligned (s, s->frontier));
+    assert (s->heap->start + s->heap->oldGenSize <= s->heap->nursery);
+    assert (s->heap->nursery <= s->heap->start + s->heap->availableSize);
+    assert (s->heap->nursery <= s->frontier or 0 == s->frontier);
+    assert (s->start <= s->frontier);
+    unless (0 == s->heap->size or 0 == s->frontier) {
+      assert (s->frontier <= s->limitPlusSlop);
+      assert (s->limit == s->limitPlusSlop - GC_HEAP_LIMIT_SLOP);
+      assert (hasHeapBytesFree (s, 0, 0));
+    }
+  }
+  assert (not ensureFrontier or invariantForMutatorFrontier(s));
+  assert (not ensureStack or invariantForMutatorStack(s));
+}
+
 void GC_collect (GC_state s, size_t bytesRequested, bool force,
             char *file, int line) {
   if (DEBUG)
@@ -184,11 +416,24 @@
    * much as GC_HEAP_LIMIT_SLOP.
    */
   if (0 == bytesRequested)
-    bytesRequested = GC_HEAP_LIMIT_SLOP;
+    bytesRequested = s->controls->allocChunkSize;
+  else if (bytesRequested < s->controls->allocChunkSize)
+    bytesRequested = s->controls->allocChunkSize;
+  else
+    bytesRequested += GC_HEAP_LIMIT_SLOP;
+
+  /* XXX copied from enter() */
+  /* used needs to be set because the mutator has changed s->stackTop. */
+  getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed (s);
+  getThreadCurrent(s)->exnStack = s->exnStack;
+
   getThreadCurrent(s)->bytesNeeded = bytesRequested;
-  switchToSignalHandlerThreadIfNonAtomicAndSignalPending (s);
-  ensureInvariantForMutator (s, force);
-  assert (invariantForMutatorFrontier(s));
-  assert (invariantForMutatorStack(s));
-  leave (s);
+
+  ensureHasHeapBytesFreeAndOrInvariantForMutator (s, force, 
+                                                  TRUE, TRUE,
+                                                  0, 0);
 }
+
+uint32_t FFI_getOp (GC_state s) {
+  return s->ffiOp;
+}

Modified: mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h	2008-03-03 15:26:28 UTC (rev 6441)
@@ -10,7 +10,7 @@
 
 static void minorGC (GC_state s);
 static void majorGC (GC_state s, size_t bytesRequested, bool mayResize);
-static inline void growStackCurrent (GC_state s);
+static inline void growStackCurrent (GC_state s, bool allocInOldGen);
 static inline void enterGC (GC_state s);
 static inline void leaveGC (GC_state s);
 static void performGC (GC_state s, 
@@ -18,10 +18,13 @@
                        size_t nurseryBytesRequested, 
                        bool forceMajor,
                        bool mayResize);
-static inline void ensureInvariantForMutator (GC_state s, bool force);
-static inline void ensureHasHeapBytesFree (GC_state s, 
-                                           size_t oldGenBytesRequested,
-                                           size_t nurseryBytesRequested);
+static size_t fillGap (GC_state s, pointer start, pointer end);
+static void ensureHasHeapBytesFreeAndOrInvariantForMutator (GC_state s, 
+                                                            bool forceGC,
+                                                            bool ensureFrontier,
+                                                            bool ensureStack,
+                                                            size_t oldGenBytesRequested,
+                                                            size_t nurseryBytesRequested);
 
 #endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */
 
@@ -29,5 +32,7 @@
 
 void GC_collect (GC_state s, size_t bytesRequested, bool force,
                  char *file, int line);
+/* XX spoons should probably go somewhere else... or just get removed */
+uint32_t FFI_getOp (GC_state s);
 
 #endif /* (defined (MLTON_GC_INTERNAL_BASIS)) */

Modified: mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -53,34 +53,46 @@
 
 void setGCStateCurrentHeap (GC_state s, 
                             size_t oldGenBytesRequested,
-                            size_t nurseryBytesRequested) {
+                            size_t nurseryBytesRequested,
+                            bool duringInit) {
   GC_heap h;
   pointer nursery;
   size_t nurserySize;
   pointer genNursery;
   size_t genNurserySize;
+  pointer limit;
+  pointer frontier;
+  size_t bonus = GC_BONUS_SLOP * s->numberOfProcs;
 
+  if (not duringInit) {
+    nurseryBytesRequested = 0;
+    for (int proc = 0; proc < s->numberOfProcs; proc++) {
+      GC_thread thread = getThreadCurrent(&s->procStates[proc]);
+      if (thread) 
+        nurseryBytesRequested += thread->bytesNeeded;
+    }
+  }
+
   if (DEBUG_DETAILED)
     fprintf (stderr, "setGCStateCurrentHeap(%s, %s)\n",
              uintmaxToCommaString(oldGenBytesRequested),
              uintmaxToCommaString(nurseryBytesRequested));
-  h = &s->heap;
+  h = s->heap;
   assert (isFrontierAligned (s, h->start + h->oldGenSize + oldGenBytesRequested));
-  s->limitPlusSlop = h->start + h->size;
-  s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
-  nurserySize = h->size - (h->oldGenSize + oldGenBytesRequested);
-  assert (isFrontierAligned (s, s->limitPlusSlop - nurserySize));
-  nursery = s->limitPlusSlop - nurserySize;
-  genNursery = alignFrontier (s, s->limitPlusSlop - (nurserySize / 2));
-  genNurserySize = s->limitPlusSlop - genNursery;
+  limit = h->start + h->size - bonus;
+  nurserySize = h->size - (h->oldGenSize + oldGenBytesRequested) - bonus;
+  assert (isFrontierAligned (s, limit - nurserySize));
+  nursery = limit - nurserySize;
+  genNursery = alignFrontier (s, limit - (nurserySize / 2));
+  genNurserySize = limit - genNursery;
   if (/* The mutator marks cards. */
       s->mutatorMarksCards
       /* There is enough space in the generational nursery. */
       and (nurseryBytesRequested <= genNurserySize)
       /* The nursery is large enough to be worth it. */
-      and (((float)(h->size - s->lastMajorStatistics.bytesLive) 
+      and (((float)(h->size - s->lastMajorStatistics->bytesLive) 
             / (float)nurserySize) 
-           <= s->controls.ratios.nursery)
+           <= s->controls->ratios.nursery)
       and /* There is a reason to use generational GC. */
       (
        /* We must use it for debugging purposes. */
@@ -97,23 +109,148 @@
     nursery = genNursery;
     nurserySize = genNurserySize;
     clearCardMap (s);
+    /* XXX copy card map to other processors? */
   } else {
     unless (nurseryBytesRequested <= nurserySize)
       die ("Out of memory.  Insufficient space in nursery.");
     s->canMinor = FALSE;
   }
+
+  if (s->controls->restrictAvailableSize
+      and
+      (s->cumulativeStatistics->maxBytesLiveSinceReset > 0)) {
+    float actualRatio;
+    h->availableSize =
+      (size_t)(s->controls->ratios.available
+               * s->cumulativeStatistics->maxBytesLiveSinceReset);
+
+    if ((h->oldGenSize + oldGenBytesRequested + nurserySize + bonus) 
+        > h->availableSize) {
+      /* Limit allocation in this round */
+      if ((h->oldGenSize + oldGenBytesRequested + nurseryBytesRequested + bonus)
+          > h->availableSize) {
+        /* We can't limit as much as we'd like, so offer enough space to
+           satisfy the current request. */
+        h->availableSize = h->oldGenSize + oldGenBytesRequested 
+          + nurseryBytesRequested + bonus;
+      }
+      if (h->availableSize > h->size) {
+        /* Can't offer more than we have. */
+        h->availableSize = h->size;
+      }
+      limit = h->start + h->availableSize - bonus;
+      nurserySize = h->availableSize - (h->oldGenSize + oldGenBytesRequested) - bonus;
+      assert (isFrontierAligned (s, limit - nurserySize));
+      nursery = limit - nurserySize;      
+      
+      if (s->canMinor) {
+        /* If we are planning for a minor collection, we must also adjust the
+           start of the nursery */
+        nursery = alignFrontier (s, limit - (nurserySize / 2));
+        nurserySize = limit - nursery;
+      }
+      if (DEBUG) {
+        fprintf (stderr, 
+                 "[GC: Restricted nursery at "FMTPTR" of %s bytes (%.1f%%).]\n",
+                 (uintptr_t)nursery, uintmaxToCommaString(limit - nursery),
+                 100.0 * ((double)(limit - nursery)
+                          / (double)h->availableSize));
+      }      
+    }
+    else {
+      /* No need to limit in this round... reset availableSize. */
+      h->availableSize = h->size;
+    }
+
+    actualRatio = (float)h->availableSize 
+      / s->cumulativeStatistics->maxBytesLiveSinceReset;
+    if ((DEBUG or s->controls->messages)
+        and 
+        (actualRatio > s->controls->ratios.available)) {
+      fprintf (stderr, 
+               "[GC: Can't restrict available ratio to %f, using %f; worst-case max-live is %s bytes.]\n",
+               s->controls->ratios.available, actualRatio,
+               uintmaxToCommaString(h->oldGenSize + oldGenBytesRequested + nurserySize));
+    }
+  }
+  else {
+    /* Otherwise, make all unused space available */
+    h->availableSize = h->size;
+  }
+
   assert (nurseryBytesRequested <= nurserySize);
-  s->heap.nursery = nursery;
-  s->frontier = nursery;
-  assert (nurseryBytesRequested <= (size_t)(s->limitPlusSlop - s->frontier));
-  assert (isFrontierAligned (s, s->heap.nursery));
-  assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
+  s->heap->nursery = nursery;
+  frontier = nursery;
+
+  if (not duringInit) {
+    for (int proc = 0; proc < s->numberOfProcs; proc++) {
+      s->procStates[proc].canMinor = s->canMinor;
+      assert (isFrontierAligned (s, frontier));
+      s->procStates[proc].start = s->procStates[proc].frontier = frontier;
+      s->procStates[proc].limitPlusSlop = s->procStates[proc].start + 
+        getThreadCurrent(&s->procStates[proc])->bytesNeeded;
+      s->procStates[proc].limit = s->procStates[proc].limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+      assert (s->procStates[proc].frontier <= s->procStates[proc].limitPlusSlop);
+      /* XXX clearCardMap (?) */
+
+      if (DEBUG)
+        for (size_t i = 0; i < GC_BONUS_SLOP; i++)
+          *(s->procStates[proc].limitPlusSlop + i) = 0xBF;
+
+      frontier = s->procStates[proc].limitPlusSlop + GC_BONUS_SLOP;
+    }
+  }
+  else {
+    assert (Proc_processorNumber (s) == 0);
+    /* XXX this is a lot of copy-paste */
+    for (int proc = 1; proc < s->numberOfProcs; proc++) {
+      s->procStates[proc].canMinor = s->canMinor;
+      assert (isFrontierAligned (s, frontier));
+      s->procStates[proc].start = s->procStates[proc].frontier = frontier;
+      s->procStates[proc].limitPlusSlop = s->procStates[proc].start + 
+        GC_HEAP_LIMIT_SLOP;
+      s->procStates[proc].limit = s->procStates[proc].limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+      assert (s->procStates[proc].frontier <= s->procStates[proc].limitPlusSlop);
+      /* XXX clearCardMap (?) */
+
+      if ( DEBUG)
+        for (size_t i = 0; i < GC_BONUS_SLOP; i++)
+          *(s->procStates[proc].limitPlusSlop + i) = 0xBF;
+
+      frontier = s->procStates[proc].limitPlusSlop + GC_BONUS_SLOP;
+    }
+
+    s->start = s->frontier = frontier;
+    s->limitPlusSlop = limit;
+    s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+    /* XXX clearCardMap (?) */
+
+    if (DEBUG)
+      for (size_t i = 0; i < GC_BONUS_SLOP; i++)
+        *(s->limitPlusSlop + i) = 0xBF;
+
+    frontier = s->limitPlusSlop + GC_BONUS_SLOP;
+  }
+  h->frontier = frontier;
+  assert (h->frontier <= h->start + h->availableSize);
+
+  if (not duringInit) {
+    assert (getThreadCurrent(s)->bytesNeeded <= (size_t)(s->limitPlusSlop - s->frontier));
+    assert (hasHeapBytesFree (s, oldGenBytesRequested, getThreadCurrent(s)->bytesNeeded));
+  }
+  else {
+    assert (nurseryBytesRequested <= (size_t)(s->limitPlusSlop - s->frontier));    
+    assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
+  }
+  assert (isFrontierAligned (s, s->frontier));
 }
 
-bool GC_getAmOriginal (GC_state s) {
+bool GC_getAmOriginal (__attribute__ ((unused)) GC_state *gs) {
+  GC_state s = pthread_getspecific (gcstate_key);
   return s->amOriginal;
 }
-void GC_setAmOriginal (GC_state s, bool b) {
+void GC_setAmOriginal (__attribute__ ((unused)) GC_state *gs, bool b) {
+  GC_state s = pthread_getspecific (gcstate_key);
   s->amOriginal = b;
 }
 

Modified: mlton/branches/shared-heap-multicore/runtime/gc/heap.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/heap.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/heap.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -12,11 +12,15 @@
           "\t\tnursery = "FMTPTR"\n"
           "\t\toldGenSize = %zu\n"
           "\t\tsize = %zu\n"
-          "\t\tstart = "FMTPTR"\n",
+          "\t\tavailableSize = %zu\n"
+          "\t\tstart = "FMTPTR"\n"
+          "\t\tfrontier = "FMTPTR"\n",
           (uintptr_t)heap->nursery,
           heap->oldGenSize,
           heap->size,
-          (uintptr_t)heap->start);
+          heap->availableSize,
+          (uintptr_t)heap->start,
+          (uintptr_t)heap->frontier);
 }
 
 
@@ -25,7 +29,9 @@
   h->nursery = NULL;
   h->oldGenSize = 0;
   h->size = 0;
+  h->availableSize = h->size;
   h->start = NULL;
+  h->frontier = NULL;
 }
 
 /* sizeofHeapDesired (s, l, cs) 

Modified: mlton/branches/shared-heap-multicore/runtime/gc/heap.h
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/heap.h	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/heap.h	2008-03-03 15:26:28 UTC (rev 6441)
@@ -22,6 +22,9 @@
 */
 
 typedef struct GC_heap {
+  size_t availableSize; /* may be smaller than size if we are limiting
+                           allocation for profiling purposes */
+  pointer frontier; /* next (globally) unallocated space */
   size_t oldGenSize; /* size of old generation */
   pointer nursery; /* start of nursery */
   size_t size; /* size of heap */

Modified: mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -56,15 +56,19 @@
           or isObjptrInNursery (s, op));
 }
 
+/* Is there space in the heap for "oldGen" additional bytes;
+  also, can "nursery" bytes be allocated by the current thread
+  without using/claiming any shared resources */
 bool hasHeapBytesFree (GC_state s, size_t oldGen, size_t nursery) {
   size_t total;
   bool res;
 
   total =
-    s->heap.oldGenSize + oldGen 
-    + (s->canMinor ? 2 : 1) * (s->limitPlusSlop - s->heap.nursery);
+    s->heap->oldGenSize + oldGen 
+    + (s->canMinor ? 2 : 1) * (s->heap->frontier - s->heap->nursery);
   res = 
-    (total <= s->heap.size) 
+    (total <= s->heap->availableSize) 
+    and (s->heap->start + s->heap->oldGenSize + oldGen <= s->heap->nursery)
     and (nursery <= (size_t)(s->limitPlusSlop - s->frontier));
   if (DEBUG_DETAILED)
     fprintf (stderr, "%s = hasBytesFree (%s, %s)\n",

Modified: mlton/branches/shared-heap-multicore/runtime/gc/init-world.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/init-world.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/init-world.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -141,14 +141,17 @@
 
   for (i = 0; i < s->globalsLength; ++i)
     s->globals[i] = BOGUS_OBJPTR;
-  s->lastMajorStatistics.bytesLive = sizeofInitialBytesLive (s);
-  createHeap (s, &s->heap, 
-              sizeofHeapDesired (s, s->lastMajorStatistics.bytesLive, 0),
-              s->lastMajorStatistics.bytesLive);
+  s->lastMajorStatistics->bytesLive = sizeofInitialBytesLive (s);
+  minSize = s->lastMajorStatistics->bytesLive 
+    + ((GC_HEAP_LIMIT_SLOP + GC_BONUS_SLOP) * s->numberOfProcs);
+  createHeap (s, s->heap, 
+              sizeofHeapDesired (s, minSize, 0),
+              minSize);
+
   createCardMapAndCrossMap (s);
-  start = alignFrontier (s, s->heap.start);
-  s->frontier = start;
-  s->limitPlusSlop = s->heap.start + s->heap.size;
+  start = alignFrontier (s, s->heap->start);
+  s->start = s->frontier = start;
+  s->limitPlusSlop = s->heap->start + s->heap->size - GC_BONUS_SLOP;
   s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
   initIntInfs (s);
   initVectors (s);

Modified: mlton/branches/shared-heap-multicore/runtime/gc/new-object.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/new-object.c	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/new-object.c	2008-03-03 15:26:28 UTC (rev 6441)
@@ -75,7 +75,7 @@
   GC_thread thread;
   pointer res;
 
-  ensureHasHeapBytesFree (s, 0, sizeofStackWithHeaderAligned (s, reserved) + sizeofThread (s));
+  ensureHasHeapBytesFreeAndOrInvariantForMutator (s, FALSE, FALSE, FALSE, 0, sizeofStackWithHeaderAligned (s, alignStackReserved (s, reserved)) + sizeofThread (s));
   stack = newStack (s, reserved, FALSE);
   res = newObject (s, GC_THREAD_HEADER, 
                    sizeofThread (s), 

Modified: mlton/branches/shared-heap-multicore/runtime/gc/object.h
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/object.h	2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/object.h	2008-03-03 15:26:28 UTC (rev 6441)
@@ -99,6 +99,10 @@
 */
 #define GC_NORMAL_HEADER_SIZE GC_HEADER_SIZE
 
+typedef uint32_t GC_smallGapSize;
+#define GC_SMALL_GAP_SIZE_SIZE sizeof (GC_smallGapSize)
+#define GC_BONUS_SLOP (GC_HEADER_SIZE + GC_SMALL_GAP_SIZE_SIZE)
+
 #endif /* (defined (MLTON_GC_INTERNAL_TYPES)) */
 
 




More information about the MLton-commit mailing list