[MLton-commit] r4241: First cut at fixing -codegen c -profile time

Matthew Fluet MLton@mlton.org
Thu, 17 Nov 2005 19:06:17 -0800


MAIL First cut at fixing -codegen c -profile time

A different approach to time profiling: maintain the current source
position via program operations.  This has the advantage that it
significantly more portable than trying to inject just the right
information into C code in a manner that isn't corrupted by gcc
optimizations.

The high points:
 * added  volatile uint curSourceSeqsIndex;  to struct GC_state;
 * replaced PROFILE_TIME with PROFILE_TIME_FIELD and
   PROFILE_TIME_LABEL, which distinguishes the two methods of time
   profiling (the former uses a the GC_state field to compute the
   sourceSeqsIndex, the later uses assembler labels).
 * added  -profile time-field  and  -profile time-label  as expert
   options; -profile time  remains and is set according to the
   codegen; you can use the expert options to override the codegen
   default (use time-label with the C-codegen at your peril, use
   time-field with the native codegen for benchmarking purposes).
 * made _minimal_ changes to profile.fun; essentially, if we would
   have put in a profile label, instead put in an explict move of 
   the sourceSeqsIndex to the GC_state field.

Note that this sets curSourceSeqsIndex more often than necessary.
Profile labels were inserted for code coverage properties; namely, at
the beginning of every basic block -- whether or not control could
have flowed from a block with a different souceSeqsIndex.  Inspecting
the generated code, it is clear that we could insert fewer sets.

Here is a comparison of the results of using different combinations of
codegen and profiling methods:

[fluet@localhost native]$ mlton -codegen native -profile time hamlet.sml
[fluet@localhost native]$ ./hamlet > /dev/null
[fluet@localhost native]$ mlprof -thresh 2.0 hamlet mlmon.out
25.98 seconds of CPU time (4.54 seconds GC)
         function            cur
--------------------------- -----
<gc>                        14.9%
Sequence.Slice.collate.loop 12.7%
BinaryMapFn.find.mem        11.0%
Integer.scan                 8.6%
BinaryMapFn.insert           8.0%
Lab.compare                  5.2%
Sequence.make2               4.4%
BinaryMapFn.T'               3.9%
Integer.scan.negate          3.0%
EvalCore.evalExp             2.6%
Integer.scan.num             2.2%
BinaryMapFn.foldli.fold      2.1%
Integer.scan.finishNum       2.1%


[fluet@localhost native]$ mlton -codegen native -profile time-field hamlet.sml
[fluet@localhost native]$ ./hamlet > /dev/null
[fluet@localhost native]$ mlprof -thresh 2.0 hamlet mlmon.out
29.31 seconds of CPU time (4.42 seconds GC)
         function            cur
--------------------------- -----
Sequence.Slice.collate.loop 18.4%
<gc>                        13.1%
Integer.scan                 9.3%
BinaryMapFn.insert           6.2%
BinaryMapFn.find.mem         6.0%
Sequence.make2               4.3%
Integer.scan.negate          3.8%
Lab.compare                  3.6%
BinaryMapFn.T'               3.5%
wrapOverflow.fn              2.8%
Integer.scan.finishNum       2.7%
BinaryMapFn.N                2.6%
EvalCore.evalExp             2.5%
Integer.scan.num             2.1%
EvalCore.evalAtExp           2.0%


[fluet@localhost c]$ mlton -codegen c -profile time hamlet.sml 
[fluet@localhost c]$ ./hamlet > /dev/null
[fluet@localhost c]$ mlprof -thresh 2.0 hamlet mlmon.out
60.78 seconds of CPU time (4.72 seconds GC)
         function            cur
--------------------------- -----
Sequence.Slice.collate.loop 18.7%
BinaryMapFn.find.mem        11.0%
BinaryMapFn.insert          10.2%
<gc>                         7.2%
Integer.scan.negate          6.0%
Integer.scan                 6.0%
Lab.compare                  4.7%
Sequence.make2               3.8%
EvalCore.evalExp             3.5%
EvalCore.evalAtPat           3.3%
BinaryMapFn.T'               2.6%
EvalCore.evalPatRow          2.4%
Integer.scan.finishNum       2.4%
wrapOverflow.fn              2.1%


There is a slight slowdown of using the field method over using the
label method, but nothing like the slowdown of going to the C codegen.
Regardless of the running time of the program, you get essentially the
same profiling results (with the exception that <gc> time proportional
to total running time).


----------------------------------------------------------------------

U   mlton/trunk/include/c-chunk.h
U   mlton/trunk/mlton/backend/backend.fun
U   mlton/trunk/mlton/backend/machine.fun
U   mlton/trunk/mlton/backend/profile.fun
U   mlton/trunk/mlton/backend/rep-type.fun
U   mlton/trunk/mlton/backend/runtime.fun
U   mlton/trunk/mlton/backend/runtime.sig
U   mlton/trunk/mlton/codegen/c-codegen/c-codegen.fun
U   mlton/trunk/mlton/control/control-flags.sig
U   mlton/trunk/mlton/control/control-flags.sml
U   mlton/trunk/mlton/main/compile.fun
U   mlton/trunk/mlton/main/lookup-constant.fun
U   mlton/trunk/mlton/main/main.fun
U   mlton/trunk/runtime/gc.c
U   mlton/trunk/runtime/gc.h

----------------------------------------------------------------------

Modified: mlton/trunk/include/c-chunk.h
===================================================================
--- mlton/trunk/include/c-chunk.h	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/include/c-chunk.h	2005-11-18 03:06:08 UTC (rev 4241)
@@ -190,24 +190,6 @@
                 Return();                                                       \
         } while (0)                                                             \
 
-#if (defined __APPLE_CC__)
-
-#define DeclareProfileLabel(l)                  \
-        void l()
-
-#define ProfileLabel(l)                                         \
-        __asm__ __volatile__ (".globl _" #l "\n_" #l ":" : : )
-
-#else
-
-#define DeclareProfileLabel(l)                                  \
-        void l() __attribute__ ((alias (#l "_internal")))
-
-#define ProfileLabel(l)                                 \
-        __asm__ __volatile__ (#l "_internal:" : : )
-
-#endif
-
 /* ------------------------------------------------- */
 /*                       Real                        */
 /* ------------------------------------------------- */

Modified: mlton/trunk/mlton/backend/backend.fun
===================================================================
--- mlton/trunk/mlton/backend/backend.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/backend/backend.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -967,7 +967,7 @@
                            end
                       | R.Kind.Jump => (M.Kind.Jump, live, Vector.new0 ())
                   val (first, statements) =
-                     if !Control.profile = Control.ProfileTime
+                     if !Control.profile = Control.ProfileTimeLabel
                         then
                            case (if 0 = Vector.length statements
                                     then NONE

Modified: mlton/trunk/mlton/backend/machine.fun
===================================================================
--- mlton/trunk/mlton/backend/machine.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/backend/machine.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -884,7 +884,7 @@
                         vectors, ...}) =
          let
             val _ =
-               if !Control.profile = Control.ProfileTime
+               if !Control.profile = Control.ProfileTimeLabel
                   then
                      List.foreach
                      (chunks, fn Chunk.T {blocks, ...} =>

Modified: mlton/trunk/mlton/backend/profile.fun
===================================================================
--- mlton/trunk/mlton/backend/profile.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/backend/profile.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -148,7 +148,9 @@
       val profile = !Control.profile
       val profileStack: bool = !Control.profileStack
       val needProfileLabels: bool =
-         profile = ProfileTime orelse profile = ProfileLabel
+         profile = ProfileTimeLabel orelse profile = ProfileLabel
+      val needCodeCoverage: bool =
+         needProfileLabels orelse (profile = ProfileTimeField)
       val frameProfileIndices: (Label.t * int) list ref = ref []
       val infoNodes: InfoNode.t list ref = ref []
       val nameCounter = Counter.new 0
@@ -300,7 +302,7 @@
          Property.getSetOnce
          (Label.plist, Property.initRaise ("info", Label.layout))
       val labels = ref []
-      fun profileLabelIndex (sourceSeqsIndex: int): Statement.t =
+      fun profileLabelFromIndex (sourceSeqsIndex: int): Statement.t =
          let
             val l = ProfileLabel.new ()
             val _ = List.push (labels, {label = l,
@@ -308,8 +310,25 @@
          in
             Statement.ProfileLabel l
          end
-      fun profileLabel (sourceSeq: int list): Statement.t =
-         profileLabelIndex (sourceSeqIndex sourceSeq)
+      fun setCurSourceSeqsIndexFromIndex (sourceSeqsIndex: int): Statement.t =
+         let
+            val curSourceSeqsIndex = 
+               Operand.Runtime Runtime.GCField.CurSourceSeqsIndex
+         in
+            Statement.Move
+            {dst = curSourceSeqsIndex,
+             src = Operand.word (WordX.fromIntInf 
+                                 (IntInf.fromInt sourceSeqsIndex,
+                                  WordSize.default))}
+         end
+      fun codeCoverageStatementFromIndex (sourceSeqsIndex: int): Statement.t =
+         if needProfileLabels
+            then profileLabelFromIndex sourceSeqsIndex
+         else if profile = ProfileTimeField
+            then setCurSourceSeqsIndexFromIndex sourceSeqsIndex
+         else Error.bug "Profile.codeCoverageStatement"
+      fun codeCoverageStatement (sourceSeq: int list): Statement.t =
+         codeCoverageStatementFromIndex (sourceSeqIndex sourceSeq)
       local
          val {get: Func.t -> FuncInfo.t, ...} =
             Property.get (Func.plist, Property.initFun (fn _ => FuncInfo.new ()))
@@ -444,22 +463,22 @@
                           statements: Statement.t list,
                           transfer: Transfer.t}: unit =
                let
-                  val (_, npl, sourceSeq, statements) =
+                  val (_, ncc, sourceSeq, statements) =
                      List.fold
                      (statements,
                       (leaves, true, sourceSeq, []),
-                      fn (s, (leaves, npl, sourceSeq, ss)) =>
+                      fn (s, (leaves, ncc, sourceSeq, ss)) =>
                       case s of
                          Object _ => (leaves, true, sourceSeq, s :: ss)
                        | Profile ps =>
                             let
-                               val (npl, ss) =
-                                  if needProfileLabels
+                               val (ncc, ss) =
+                                  if needCodeCoverage
                                      then
-                                        if npl
+                                        if ncc
                                            andalso not (List.isEmpty sourceSeq)
                                            then (false,
-                                                 profileLabel sourceSeq :: ss)
+                                                 codeCoverageStatement sourceSeq :: ss)
                                         else (true, ss)
                                   else (false, ss)
                                val (leaves, sourceSeq) = 
@@ -478,13 +497,13 @@
                                                 InfoNode.sourcesIndex infoNode
                                                 :: sourceSeq))
                             in
-                               (leaves, npl, sourceSeq, ss)
+                               (leaves, ncc, sourceSeq, ss)
                             end
                        | _ => (leaves, true, sourceSeq, s :: ss))
                   val statements =
-                     if needProfileLabels
-                        andalso npl
-                        then profileLabel sourceSeq :: statements
+                     if needCodeCoverage
+                        andalso ncc
+                        then codeCoverageStatement sourceSeq :: statements
                      else statements
                   val {args, kind, label} =
                      if profileStack andalso (case kind of
@@ -499,10 +518,9 @@
                                  addFrameProfileIndex
                                  (newLabel, sourceSeqIndex sourceSeq)
                               val statements =
-                                 if needProfileLabels
+                                 if needCodeCoverage
                                     then (Vector.new1
-                                          (profileLabelIndex
-                                           (sourceSeqIndex sourceSeq)))
+                                          (codeCoverageStatement sourceSeq))
                                  else Vector.new0 ()
                               val _ =
                                  List.push
@@ -556,8 +574,8 @@
                   val index = sourceSeqIndex (Push.toSources pushes)
                   val _ = addFrameProfileIndex (newLabel, index)
                   val statements =
-                     if needProfileLabels
-                        then Vector.new1 (profileLabelIndex index)
+                     if needCodeCoverage
+                        then Vector.new1 (codeCoverageStatementFromIndex index)
                      else Vector.new0 ()
                   val _ =
                      List.push

Modified: mlton/trunk/mlton/backend/rep-type.fun
===================================================================
--- mlton/trunk/mlton/backend/rep-type.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/backend/rep-type.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -450,6 +450,7 @@
          CanHandle => defaultWord
        | CardMap => cPointer ()
        | CurrentThread => cPointer ()
+       | CurSourceSeqsIndex => defaultWord
        | ExnStack => defaultWord
        | Frontier => cPointer ()
        | Limit => cPointer ()

Modified: mlton/trunk/mlton/backend/runtime.fun
===================================================================
--- mlton/trunk/mlton/backend/runtime.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/backend/runtime.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -16,6 +16,7 @@
          CanHandle
        | CardMap
        | CurrentThread
+       | CurSourceSeqsIndex
        | ExnStack
        | Frontier
        | Limit
@@ -46,6 +47,7 @@
       val canHandleOffset: Bytes.t ref = ref Bytes.zero
       val cardMapOffset: Bytes.t ref = ref Bytes.zero
       val currentThreadOffset: Bytes.t ref = ref Bytes.zero
+      val curSourceSeqsIndexOffset: Bytes.t ref = ref Bytes.zero
       val exnStackOffset: Bytes.t ref = ref Bytes.zero
       val frontierOffset: Bytes.t ref = ref Bytes.zero
       val limitOffset: Bytes.t ref = ref Bytes.zero
@@ -56,12 +58,13 @@
       val stackLimitOffset: Bytes.t ref = ref Bytes.zero
       val stackTopOffset: Bytes.t ref = ref Bytes.zero
 
-      fun setOffsets {canHandle, cardMap, currentThread, exnStack, frontier,
-                      limit, limitPlusSlop, maxFrameSize, signalIsPending,
-                      stackBottom, stackLimit, stackTop} =
+      fun setOffsets {canHandle, cardMap, currentThread, curSourceSeqsIndex, 
+                      exnStack, frontier, limit, limitPlusSlop, maxFrameSize, 
+                      signalIsPending, stackBottom, stackLimit, stackTop} =
          (canHandleOffset := canHandle
           ; cardMapOffset := cardMap
           ; currentThreadOffset := currentThread
+          ; curSourceSeqsIndexOffset := curSourceSeqsIndex
           ; exnStackOffset := exnStack
           ; frontierOffset := frontier
           ; limitOffset := limit
@@ -76,6 +79,7 @@
          fn CanHandle => !canHandleOffset
           | CardMap => !cardMapOffset
           | CurrentThread => !currentThreadOffset
+          | CurSourceSeqsIndex => !curSourceSeqsIndexOffset
           | ExnStack => !exnStackOffset
           | Frontier => !frontierOffset
           | Limit => !limitOffset
@@ -90,6 +94,7 @@
          fn CanHandle => "CanHandle"
           | CardMap => "CardMap"
           | CurrentThread => "CurrentThread"
+          | CurSourceSeqsIndex => "CurSourceSeqsIndex"
           | ExnStack => "ExnStack"
           | Frontier => "Frontier"
           | Limit => "Limit"

Modified: mlton/trunk/mlton/backend/runtime.sig
===================================================================
--- mlton/trunk/mlton/backend/runtime.sig	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/backend/runtime.sig	2005-11-18 03:06:08 UTC (rev 4241)
@@ -23,6 +23,7 @@
                CanHandle
              | CardMap
              | CurrentThread
+             | CurSourceSeqsIndex
              | ExnStack
              | Frontier (* The place where the next object is allocated. *)
              | Limit (* frontier + heapSize - LIMIT_SLOP *)
@@ -39,6 +40,7 @@
             val setOffsets: {canHandle: Bytes.t,
                              cardMap: Bytes.t,
                              currentThread: Bytes.t,
+                             curSourceSeqsIndex: Bytes.t,
                              exnStack: Bytes.t,
                              frontier: Bytes.t,
                              limit: Bytes.t,

Modified: mlton/trunk/mlton/codegen/c-codegen/c-codegen.fun
===================================================================
--- mlton/trunk/mlton/codegen/c-codegen/c-codegen.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/codegen/c-codegen/c-codegen.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -359,7 +359,8 @@
                 | Control.ProfileCount => "PROFILE_COUNT"
                 | Control.ProfileDrop => "PROFILE_NONE"
                 | Control.ProfileLabel => "PROFILE_NONE"
-                | Control.ProfileTime => "PROFILE_TIME"
+                | Control.ProfileTimeField => "PROFILE_TIME_FIELD"
+                | Control.ProfileTimeLabel => "PROFILE_TIME_LABEL"
          in 
             C.callNoSemi ("Main",
                           [C.int align,
@@ -676,7 +677,9 @@
                                     print)
                             ))
          end
-      val amTimeProfiling = !Control.profile = Control.ProfileTime
+      val amTimeProfiling = 
+         !Control.profile = Control.ProfileTimeField
+         orelse !Control.profile = Control.ProfileTimeLabel
       fun outputChunk (chunk as Chunk.T {chunkLabel, blocks, regMax, ...}) =
          let
             val {done, print, ...} = outputC ()

Modified: mlton/trunk/mlton/control/control-flags.sig
===================================================================
--- mlton/trunk/mlton/control/control-flags.sig	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/control/control-flags.sig	2005-11-18 03:06:08 UTC (rev 4241)
@@ -252,7 +252,8 @@
        | ProfileCount
        | ProfileDrop
        | ProfileLabel
-       | ProfileTime
+       | ProfileTimeField
+       | ProfileTimeLabel
       val profile: profile ref
 
       val profileBranch: bool ref

Modified: mlton/trunk/mlton/control/control-flags.sml
===================================================================
--- mlton/trunk/mlton/control/control-flags.sml	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/control/control-flags.sml	2005-11-18 03:06:08 UTC (rev 4241)
@@ -814,7 +814,8 @@
        | ProfileCount
        | ProfileDrop
        | ProfileLabel
-       | ProfileTime
+       | ProfileTimeField
+       | ProfileTimeLabel
 
       val toString =
          fn ProfileNone => "None"
@@ -823,7 +824,8 @@
           | ProfileCount => "Count"
           | ProfileDrop => "Drop"
           | ProfileLabel => "Label"
-          | ProfileTime => "Time"
+          | ProfileTimeField => "TimeField"
+          | ProfileTimeLabel => "TimeLabel"
    end
 
 datatype profile = datatype Profile.t

Modified: mlton/trunk/mlton/main/compile.fun
===================================================================
--- mlton/trunk/mlton/main/compile.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/main/compile.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -454,6 +454,7 @@
              canHandle = get "canHandle",
              cardMap = get "cardMapForMutator",
              currentThread = get "currentThread",
+             curSourceSeqsIndex = get "curSourceSeqsIndex",
              exnStack = get "exnStack",
              frontier = get "frontier",
              limit = get "limit",

Modified: mlton/trunk/mlton/main/lookup-constant.fun
===================================================================
--- mlton/trunk/mlton/main/lookup-constant.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/main/lookup-constant.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -44,6 +44,7 @@
    [
     "canHandle",
     "currentThread",
+    "curSourceSeqsIndex",
     "exnStack",
     "frontier",
     "cardMapForMutator",

Modified: mlton/trunk/mlton/main/main.fun
===================================================================
--- mlton/trunk/mlton/main/main.fun	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/mlton/main/main.fun	2005-11-18 03:06:08 UTC (rev 4241)
@@ -61,6 +61,7 @@
 val linkOpts: {opt: string, pred: OptPred.t} list ref = ref []
 val output: string option ref = ref NONE
 val profileSet: bool ref = ref false
+val profileTimeSet: bool ref = ref false
 val runtimeArgs: string list ref = ref ["@MLton"]
 val showAnns: bool ref = ref false
 val stop = ref Place.OUT
@@ -346,7 +347,10 @@
                             | "count" => ProfileCount
                             | "drop" => ProfileDrop
                             | "label" => ProfileLabel
-                            | "time" => ProfileTime
+                            | "time" => (profileTimeSet := true
+                                         ; ProfileTimeLabel)
+                            | "time-field" => ProfileTimeField
+                            | "time-label" => ProfileTimeLabel
                             | _ => usage (concat
                                           ["invalid -profile arg: ", s]))))),
        (Normal, "profile-branch", " {false|true}",
@@ -531,6 +535,11 @@
                              Out.standard)
              ; let open OS.Process in exit success end)
          else ()
+      val () = if !profileTimeSet
+                  then (case !codegen of
+                           Native => profile := ProfileTimeLabel
+                         | _ => profile := ProfileTimeField)
+                  else ()
       val () = if !exnHistory
                   then (case !profile of
                            ProfileNone => profile := ProfileCallStack
@@ -639,7 +648,8 @@
           | OpenBSD => ()
           | Solaris => ()
           | _ =>
-               if !profile = ProfileTime
+               if !profile = ProfileTimeField 
+                  orelse !profile = ProfileTimeLabel
                   then usage (concat ["can't use -profile time on ",
                                       MLton.Platform.OS.toString targetOS])
                else ()

Modified: mlton/trunk/runtime/gc.c
===================================================================
--- mlton/trunk/runtime/gc.c	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/runtime/gc.c	2005-11-18 03:06:08 UTC (rev 4241)
@@ -3544,7 +3544,8 @@
         if (DEBUG_PROFILE) 
                 fprintf (stderr, "GC_profileDone ()\n");
         assert (s->profilingIsOn);
-        if (PROFILE_TIME == s->profileKind)
+        if (PROFILE_TIME_FIELD == s->profileKind
+            or PROFILE_TIME_LABEL == s->profileKind)
                 setProfTimer (0);
         s->profilingIsOn = FALSE;
         p = s->profile;
@@ -3814,9 +3815,12 @@
         case PROFILE_NONE:
                 die ("impossible PROFILE_NONE");
         break;
-        case PROFILE_TIME:
+        case PROFILE_TIME_FIELD:
                 kind = "time\n";
         break;
+        case PROFILE_TIME_LABEL:
+                kind = "time\n";
+        break;
         }
         writeString (fd, kind);
         writeString (fd, s->profileStack 
@@ -3853,28 +3857,33 @@
 void GC_handleSigProf (pointer pc) {
         uint frameIndex;
         GC_state s;
-        uint sourceSeqIndex;
+        uint sourceSeqsIndex;
 
         s = catcherState;
         if (DEBUG_PROFILE)
                 fprintf (stderr, "GC_handleSigProf (0x%08x)\n", (uint)pc);
         if (s->amInGC)
-                sourceSeqIndex = SOURCE_SEQ_GC;
+                sourceSeqsIndex = SOURCE_SEQ_GC;
         else {
                 frameIndex = topFrameIndex (s);
                 if (s->frameLayouts[frameIndex].isC)
-                        sourceSeqIndex = s->frameSources[frameIndex];
+                        sourceSeqsIndex = s->frameSources[frameIndex];
                 else {
-                        if (s->textStart <= pc and pc < s->textEnd)
-                                sourceSeqIndex = s->textSources [pc - s->textStart];
-                        else {
-                                if (DEBUG_PROFILE)
-                                        fprintf (stderr, "pc out of bounds\n");
-                                sourceSeqIndex = SOURCE_SEQ_UNKNOWN;
+                        if (PROFILE_TIME_LABEL == s->profileKind) {
+                                if (s->textStart <= pc and pc < s->textEnd)
+                                        sourceSeqsIndex = 
+                                                s->textSources [pc - s->textStart];
+                                else {
+                                        if (DEBUG_PROFILE)
+                                                fprintf (stderr, "pc out of bounds\n");
+                                        sourceSeqsIndex = SOURCE_SEQ_UNKNOWN;
+                                }
+                        } else {
+                                sourceSeqsIndex = s->curSourceSeqsIndex;
                         }
                 }
         }
-        profileInc (s, 1, sourceSeqIndex);
+        profileInc (s, 1, sourceSeqsIndex);
 }
 
 static int compareProfileLabels (const void *v1, const void *v2) {
@@ -3893,6 +3902,7 @@
         uint sourceSeqsIndex;
 
         s->profile = GC_profileNew (s);
+        if (PROFILE_TIME_LABEL == s->profileKind) {
         /* Sort sourceLabels by address. */
         qsort (s->sourceLabels, s->sourceLabelsSize, sizeof (*s->sourceLabels),
                 compareProfileLabels);
@@ -3929,6 +3939,9 @@
         }
         for ( ; p < s->textEnd; ++p)
                 s->textSources[p - s->textStart] = sourceSeqsIndex;
+        } else {
+        s->curSourceSeqsIndex = SOURCE_SEQ_UNKNOWN;
+        }
         /*
          * Install catcher, which handles SIGPROF and calls MLton_Profile_inc.
          * 
@@ -4540,7 +4553,8 @@
                 break;
                 case PROFILE_NONE:
                         die ("impossible PROFILE_NONE");
-                case PROFILE_TIME:
+                case PROFILE_TIME_FIELD:
+                case PROFILE_TIME_LABEL:
                         profileTimeInit (s);
                 break;
                 }

Modified: mlton/trunk/runtime/gc.h
===================================================================
--- mlton/trunk/runtime/gc.h	2005-11-17 22:31:55 UTC (rev 4240)
+++ mlton/trunk/runtime/gc.h	2005-11-18 03:06:08 UTC (rev 4241)
@@ -246,7 +246,8 @@
         PROFILE_ALLOC,
         PROFILE_COUNT,
         PROFILE_NONE,
-        PROFILE_TIME,
+        PROFILE_TIME_FIELD,
+        PROFILE_TIME_LABEL
 } ProfileKind;
 
 typedef struct GC_source {
@@ -377,6 +378,7 @@
          */
         uint crossMapValidSize;
         GC_thread currentThread; /* This points to a thread in the heap. */
+        volatile uint curSourceSeqsIndex; /* Used by time profiling. */
         uint fixedHeap; /* If 0, then no fixed heap. */
         GC_frameLayout *frameLayouts;
         uint frameLayoutsSize;
@@ -505,7 +507,7 @@
         bool summary; 
         pointer textEnd;
         /* An array of indices, one entry for each address in the text segment,
-         * giving and index into profileSourceSeqs.
+         * giving an index into profileSourceSeqs.
          */
         uint *textSources;
         pointer textStart;