[MLton-commit] r5567

Vesa Karvonen vesak at mlton.org
Sat May 19 15:36:59 PDT 2007


Improved generation of recursive datatypes.

----------------------------------------------------------------------

U   mltonlib/trunk/com/ssh/misc-util/unstable/arbitrary.sml

----------------------------------------------------------------------

Modified: mltonlib/trunk/com/ssh/misc-util/unstable/arbitrary.sml
===================================================================
--- mltonlib/trunk/com/ssh/misc-util/unstable/arbitrary.sml	2007-05-18 17:16:17 UTC (rev 5566)
+++ mltonlib/trunk/com/ssh/misc-util/unstable/arbitrary.sml	2007-05-19 22:36:57 UTC (rev 5567)
@@ -33,8 +33,8 @@
 end
 
 structure Arbitrary :> sig
-   include STRUCTURAL_TYPE
-   include ARBITRARY where type 'a arbitrary_t = 'a t
+   include STRUCTURAL_TYPE ARBITRARY
+   sharing type arbitrary_t = t
 end = struct
    structure G = RanQD1Gen and I = Int and R = Real and W = Word
          and Typ = TypeInfo
@@ -79,9 +79,20 @@
                                R.toLarge),
                   typ = Typ.real}
 
-   fun Y ? = let open Tie in iso (function *` function *` Typ.Y) end
-                (fn IN {gen = a, cog = b, typ = c} => a & b & c,
-                 fn a & b & c => IN {gen = a, cog = b, typ = c}) ?
+   fun Y ? = let
+      open Tie
+      val genFn = pure (fn () => let
+                              val r = ref (raising Fix.Fix)
+                              fun f x = !r x
+                           in
+                              (G.resize (op div /> 2) f,
+                               fn f' => (r := f' ; f'))
+                           end)
+   in
+      iso (genFn *` function *` Typ.Y)
+          (fn IN {gen = a, cog = b, typ = c} => a & b & c,
+           fn a & b & c => IN {gen = a, cog = b, typ = c})
+   end ?
 
    fun (IN {gen = aGen, cog = aCog, typ = aTyp, ...}) *`
        (IN {gen = bGen, cog = bCog, typ = bTyp, ...}) =
@@ -89,43 +100,18 @@
            cog = fn n => fn a & b => aCog n a o G.split 0w643 o bCog n b,
            typ = Typ.*` (aTyp, bTyp)}
 
-   (* XXX Generation of recursive datatypes could probably be improved.
-    *
-    * We are somewhat more ambitious here than what is done in the
-    * original QuickCheck library.  As noted in the QuickCheck paper,
-    * naive generation of recursive datatypes may not terminate (for one
-    * thing).  The simplistic heuristic used below is to reduce the size
-    * whenever the recursive branch is chosen.  This guarantees
-    * termination in many cases, but not all.  However, it is probably
-    * possible to devise a much smarter algorithm.  Namely, one could
-    * compute a "probability of recursion" of some kind and then use that
-    * while choosing which branch to generate.  Consider the following
-    * datatype:
-    *
-    *>  datatype foo = ALWAYS of foo * foo | SOMETIMES of foo option
-    *
-    * Intuitively the "recursion probabilities" of the ALWAYS and
-    * SOMETIMES branches are different.  It seems plausible that this
-    * could be exploited to guarantee termination.
-    *
-    * Actually, it would probably be more fruitful to use an estimate of
-    * the expected "size" of the complete generated data structure to
-    * guide the generation process.
-    *)
-
    fun (IN {gen = aGen, cog = aCog, typ = aTyp, ...}) +`
        (IN {gen = bGen, cog = bCog, typ = bTyp, ...}) = let
       val aGen = map INL aGen
       val bGen = map INR bGen
-      val halve = G.resize (op div /> 2)
-      val aGenHalf = G.frequency [(2, halve aGen), (1, bGen)]
-      val bGenHalf = G.frequency [(1, aGen), (2, halve bGen)]
+      val gen = G.frequency [(Typ.numConsecutiveAlts aTyp, aGen),
+                             (Typ.numConsecutiveAlts bTyp, bGen)]
+      val gen0 = case Typ.hasBaseCase aTyp & Typ.hasBaseCase bTyp of
+                    true & false => aGen
+                  | false & true => bGen
+                  | _            => gen
    in
-      IN {gen = case Typ.hasRecData aTyp & Typ.hasRecData bTyp of
-                   true  & false => G.sized (fn 0 => bGen | _ => aGenHalf)
-                 | false & true  => G.sized (fn 0 => aGen | _ => bGenHalf)
-                 | _     & _     =>
-                   G.bool >>= (fn false => aGen | true  => bGen),
+      IN {gen = G.sized (fn 0 => gen0 | _ => gen),
           cog = fn n => fn INL a => G.split 0w423 o aCog n a
                          | INR b => G.split 0w324 o bCog n b,
           typ = Typ.+` (aTyp, bTyp)}




More information about the MLton-commit mailing list