[MLton-commit] r5750

Vesa Karvonen vesak at mlton.org
Mon Jul 9 19:56:57 PDT 2007


Somewhat simplified and improved generic hash function.
----------------------------------------------------------------------

U   mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml
U   mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig

----------------------------------------------------------------------

Modified: mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml
===================================================================
--- mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml	2007-07-10 02:50:44 UTC (rev 5749)
+++ mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml	2007-07-10 02:56:56 UTC (rev 5750)
@@ -4,8 +4,6 @@
  * See the LICENSE file or http://mlton.org/License for details.
  *)
 
-(* XXX Devise a better hash function.  This is not pretty. *)
-
 functor WithHash (Arg : WITH_HASH_DOM) : HASH_GENERIC = struct
    (* <-- SML/NJ workaround *)
    open TopLevel
@@ -20,28 +18,23 @@
    infixr 0 -->
    (* SML/NJ workaround --> *)
 
-   structure W = Word
+   type 'a t = 'a -> {totWidth : Int.t, maxDepth : Int.t} -> Word.t
 
-   type 'a t = 'a -> {maxWidth : Int.t, maxDepth : Int.t} -> Word.t UnOp.t
+   fun prim f : 'a t = const o f
 
-   structure HC : sig
-      val map : ('b -> 'a) -> 'a t -> 'b t
-      val withConst : Word.t -> 'a t UnOp.t
-      val lift : ('a -> Word.t) -> 'a t
-   end = struct
-      fun map b2a hA = hA o b2a
-      fun withConst w hA a p r = hA a p (W.+ (w, r))
-      fun lift toWord a _ r = r * 0w19 + toWord a
-   end
-
    structure Hash =
       LayerGenericRep (structure Outer = Arg.Rep
                        structure Closed = MkClosedGenericRep (type 'a t = 'a t))
 
    open Hash.This
 
-   fun hash t v = getT t v {maxWidth = 200, maxDepth = 10} 0wx2CA4B13
+   fun hashParam t p =
+       if #totWidth p < 0 orelse #maxDepth p < 0
+       then raise Domain
+       else fn v => getT t v p
 
+   fun hash t = hashParam t {totWidth = 200, maxDepth = 10}
+
    structure Layered = LayerDepGeneric
      (structure Outer = Arg and Result = Hash
 
@@ -50,64 +43,67 @@
       fun isoProduct ? = iso' (getP ?)
       fun isoSum     ? = iso' (getS ?)
 
-      fun op *` (aT, bT) (a & b) {maxWidth, maxDepth} = let
+      fun op *` (aT, bT) (a & b) {totWidth, maxDepth} = let
          val aN = Arg.numElems aT
          val bN = Arg.numElems bT
-         val aW = Int.quot (maxWidth * aN, aN + bN)
-         val bW = maxWidth - aW
+         val aW = Int.quot (totWidth * aN, aN + bN)
+         val bW = totWidth - aW
       in
-         getP bT b {maxWidth = bW, maxDepth = maxDepth} o
-         getP aT a {maxWidth = aW, maxDepth = maxDepth}
+         getP bT b {totWidth = bW, maxDepth = maxDepth} * 0w13 +
+         getP aT a {totWidth = aW, maxDepth = maxDepth}
       end
       val T   = getT
       fun R _ = getT
-      fun product' aP a p = if #maxWidth p = 0 then id else (getP aP) a p
-      val tuple  = product'
-      val record = product'
+      fun tuple aP a p = if #totWidth p = 0 then 0w0 else getP aP a p
+      val record = tuple
 
-      fun op +` ? =
-          Sum.sum (Pair.map (HC.withConst 0wx96BA232 o getS,
-                             HC.withConst 0wxCF24651 o getS) ?)
-      val unit = HC.lift (Thunk.mk 0wx2F785)
+      fun op +` ? = let
+         fun withConst c f v p = Word.xorb (f v p, c)
+      in
+         Sum.sum o Pair.map (withConst 0wx96BA232 o getS,
+                             withConst 0wxCF24651 o getS)
+      end ?
+      val unit = prim (Thunk.mk 0wx2F785)
       fun C0 _ = unit
       fun C1 _ = getT
-      fun data aS a {maxDepth, maxWidth} =
-          if maxDepth = 0 then id
+      fun data aS a {maxDepth, totWidth} =
+          if maxDepth = 0 then 0w0
           else getS aS a {maxDepth = maxDepth - 1,
-                          maxWidth = Int.quot (maxWidth, 2)}
+                          totWidth = totWidth}
 
       val Y = Tie.function
 
       fun op --> _ = failing "Hash.--> unsupported"
 
-      fun exn _ = failing "Hash.exn unsupported"
-      fun regExn _ _ = ()
+      fun refc aT = getT aT o !
 
-      fun refc aT = HC.withConst 0wx178A2346 (HC.map ! (getT aT))
+      val int = prim Word.fromInt
 
-      fun list xT xs {maxWidth, maxDepth} h = let
-         val m = Int.quot (maxWidth, 2)
+      fun list xT xs {totWidth, maxDepth} = let
+         val m = Int.quot (totWidth, 2)
          fun len n []      = n
            | len n (_::xs) = if m <= n then n else len (n+1) xs
          val n = len 0 xs
-         val p = {maxWidth = Int.quot (maxWidth, n),
+         val p = {totWidth = Int.quot (totWidth, n),
                   maxDepth = maxDepth - 1}
          fun lp h _ []      = h
-           | lp h n (x::xs) = if n = 0 then h else lp (getT xT x p h) (n-1) xs
+           | lp h n (x::xs) =
+             if n = 0 then h else lp (h * 0w17 + getT xT x p) (n-1) xs
       in
-         lp h n xs
+         lp (Word.fromInt n) n xs
       end
 
-      fun hashSeq length sub hashElem s {maxWidth, maxDepth} h = let
+      fun hashSeq length sub hashElem s {totWidth, maxDepth} = let
          val n = length s
+         val h = Word.fromInt n
       in
-         case Int.min (Int.quot (maxWidth+3, 4), Int.quot (n+1, 2)) of
+         case Int.min (Int.quot (totWidth+3, 4), Int.quot (n+1, 2)) of
             0          => h
           | numSamples => let
-               val p = {maxWidth = Int.quot (maxWidth, numSamples),
+               val p = {totWidth = Int.quot (totWidth, numSamples),
                         maxDepth = maxDepth - 1}
                fun lp h 0 = h
-                 | lp h n = lp (hashElem (sub (s, n-1)) p h) (n-1)
+                 | lp h n = lp (h * 0w19 + hashElem (sub (s, n-1)) p) (n-1)
             in
                lp h (Int.max (numSamples, Int.min (10, n)))
             end
@@ -116,27 +112,29 @@
       fun array  aT = hashSeq Array.length  Array.sub  (getT aT)
       fun vector aT = hashSeq Vector.length Vector.sub (getT aT)
 
-      val char = HC.lift (Word.fromInt o ord)
+      val char = prim (Word.fromInt o ord)
       val string = hashSeq String.length String.sub char
 
-      val bool = HC.lift (fn true => 0wx2DA745 | false => 0wx3C24A62)
-      val int  = HC.lift Word.fromInt
-      val word = HC.lift id
+      val exn = string o Exn.message (* XXX Imprecise *)
+      fun regExn _ _ = ()
 
+      val bool = prim (fn true => 0wx2DA745 | false => 0wx3C24A62)
+      val word = const
+
       fun mk x2V op mod (v2w, w2v) =
-          HC.map (fn x => v2w (x2V x mod w2v Word.maxValue)) word
+          prim (fn x => v2w (x2V x mod w2v Word.largestPrime))
 
-      val largeInt  = mk id LargeInt.mod  (Iso.swap Word.isoLargeInt)
-      val largeWord = mk id LargeWord.mod LargeWord.isoWord
+      val largeInt  = mk id op mod (Iso.swap Word.isoLargeInt)
+      val largeWord = mk id op mod LargeWord.isoWord
 
       val largeReal =
           let open CastLargeReal open Word in mk castToWord op mod isoWord end
       val real =
           let open CastReal      open Word in mk castToWord op mod isoWord end
 
-      val word8  = HC.lift Word8.toWord
-      val word32 = HC.lift Word32.toWord
-      val word64 = mk id Word64.mod Word64.isoWord)
+      val word8  = prim Word8.toWord
+      val word32 = prim Word32.toWord
+      val word64 = mk id op mod Word64.isoWord)
 
    open Layered
 end

Modified: mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig
===================================================================
--- mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig	2007-07-10 02:50:44 UTC (rev 5749)
+++ mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig	2007-07-10 02:56:56 UTC (rev 5750)
@@ -10,8 +10,19 @@
 signature HASH = sig
    structure Hash : OPEN_GENERIC_REP
 
+   val hashParam :
+       ('a, 'x) Hash.t -> {totWidth : Int.t, maxDepth : Int.t} -> 'a -> Word.t
+   (**
+    * Returns a hash function.  The {totWidth} and {maxDepth}
+    * parameters give some control over hashing.  The {totWidth}
+    * parameter controls how many elements of sequences, like lists
+    * and vectors, will be examined.  The {maxDepth} parameter
+    * controls how many times the hash function descends into a
+    * (possibly recursive) datatype.
+    *)
+
    val hash : ('a, 'x) Hash.t -> 'a -> Word.t
-   (** Extracts the hash function. *)
+   (** Returns the default hash function. *)
 end
 
 signature HASH_GENERIC = sig




More information about the MLton-commit mailing list