[MLton-commit] r4250

Stephen Weeks MLton@mlton.org
Wed, 23 Nov 2005 21:43:34 -0800


Import unicode working copy from carrot
----------------------------------------------------------------------

A   mlton/branches/unicode/
U   mlton/branches/unicode/basis-library/arrays-and-vectors/mono.sml
U   mlton/branches/unicode/basis-library/integer/word.sml
U   mlton/branches/unicode/basis-library/libs/basis-2002/top-level/Makefile
U   mlton/branches/unicode/basis-library/libs/basis-2002/top-level/basis.sig
U   mlton/branches/unicode/basis-library/libs/basis-2002/top-level/generate-overloads.sml
U   mlton/branches/unicode/basis-library/libs/basis-2002/top-level/overloads.sml
U   mlton/branches/unicode/basis-library/libs/basis-2002/top-level/top-level.sml
U   mlton/branches/unicode/basis-library/libs/basis-extra/basis-extra.mlb
U   mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sig
U   mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sml
U   mlton/branches/unicode/basis-library/misc/primitive.sml
A   mlton/branches/unicode/basis-library/text/char.fun
U   mlton/branches/unicode/basis-library/text/char.sig
U   mlton/branches/unicode/basis-library/text/char.sml
D   mlton/branches/unicode/basis-library/text/char0.sml
A   mlton/branches/unicode/basis-library/text/charset.sml
U   mlton/branches/unicode/basis-library/text/string-cvt.sml
A   mlton/branches/unicode/basis-library/text/string.fun
U   mlton/branches/unicode/basis-library/text/string.sml
D   mlton/branches/unicode/basis-library/text/string0.sml
A   mlton/branches/unicode/basis-library/text/substring.fun
U   mlton/branches/unicode/basis-library/text/substring.sml
U   mlton/branches/unicode/basis-library/text/text.sml
A   mlton/branches/unicode/basis-library/text/unicode-4.1.0/
A   mlton/branches/unicode/basis-library/text/unicode-4.1.0/PropList.txt.gz
A   mlton/branches/unicode/basis-library/text/unicode-4.1.0/UnicodeData.txt.gz

----------------------------------------------------------------------

Copied: mlton/branches/unicode (from rev 4246, mlton/trunk)

Modified: mlton/branches/unicode/basis-library/arrays-and-vectors/mono.sml
===================================================================
--- mlton/trunk/basis-library/arrays-and-vectors/mono.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/arrays-and-vectors/mono.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -61,14 +61,44 @@
       = EqMono (type elem = char)
    open S
 in
-   structure CharArray = Array
-   structure CharArray2 = Array2
-   structure CharArraySlice = ArraySlice
-   structure CharVector = Vector
-   structure CharVectorSlice = VectorSlice
-   val _ = CharVector.fromArray: CharArray.array -> CharVector.vector
+   structure Char1Array = Array
+   structure Char1Array2 = Array2
+   structure Char1ArraySlice = ArraySlice
+   structure Char1Vector = Vector
+   structure Char1VectorSlice = VectorSlice
+   val _ = Char1Vector.fromArray: Char1Array.array -> Char1Vector.vector
 end
 local
+   structure S:>
+      EQ_MONO
+      where type Array.elem = Char2.char
+      where type Vector.vector = String2.string
+      = EqMono (type elem = Char2.char)
+   open S
+in
+   structure Char2Array = Array
+   structure Char2Array2 = Array2
+   structure Char2ArraySlice = ArraySlice
+   structure Char2Vector = Vector
+   structure Char2VectorSlice = VectorSlice
+   val _ = Char2Vector.fromArray: Char2Array.array -> Char2Vector.vector
+end
+local
+   structure S:>
+      EQ_MONO
+      where type Array.elem = Char4.char
+      where type Vector.vector = String4.string
+      = EqMono (type elem = Char4.char)
+   open S
+in
+   structure Char4Array = Array
+   structure Char4Array2 = Array2
+   structure Char4ArraySlice = ArraySlice
+   structure Char4Vector = Vector
+   structure Char4VectorSlice = VectorSlice
+   val _ = Char4Vector.fromArray: Char4Array.array -> Char4Vector.vector
+end
+local
    structure S = EqMono (type elem = Int8.int)
    open S
 in
@@ -182,6 +212,18 @@
    structure Word64Array2 = Array2
 end
 
+structure CharVector = Char1Vector
+structure CharVectorSlice = Char1VectorSlice
+structure CharArray = Char1Array
+structure CharArraySlice = Char1ArraySlice
+structure CharArray2 = Char1Array2
+
+structure WideCharVector = Char4Vector
+structure WideCharVectorSlice = Char4VectorSlice
+structure WideCharArray = Char4Array
+structure WideCharArraySlice = Char4ArraySlice
+structure WideCharArray2 = Char4Array2
+
 structure IntVector = Int32Vector
 structure IntVectorSlice = Int32VectorSlice
 structure IntArray = Int32Array

Modified: mlton/branches/unicode/basis-library/integer/word.sml
===================================================================
--- mlton/trunk/basis-library/integer/word.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/integer/word.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -75,7 +75,7 @@
          let val chars = StringCvt.digitToChar (toInt (q mod radix)) :: chars
             val q = q div radix
          in if q = zero
-               then String0.implode chars
+               then CharVector.fromList chars
             else loop (q, chars)
          end
    in loop (w, [])

Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/Makefile
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/Makefile	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/Makefile	2005-11-24 00:51:12 UTC (rev 4250)
@@ -9,7 +9,7 @@
 
 overloads.sml: $(GEN).sml
 	mlton $(GEN).sml
-	$(GEN) >overloads.sml
+	./$(GEN) >overloads.sml
 
 .PHONY: clean
 clean:

Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/basis.sig
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/basis.sig	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/basis.sig	2005-11-24 00:51:12 UTC (rev 4250)
@@ -242,7 +242,6 @@
       structure SysWord : WORD
       structure Unix : UNIX
       structure UnixSock : UNIX_SOCK
-(*
       structure WideChar : CHAR
       structure WideCharArray : MONO_ARRAY
       structure WideCharArray2 : MONO_ARRAY2
@@ -252,9 +251,8 @@
       structure WideString : STRING
       structure WideSubstring : SUBSTRING
       structure WideText : TEXT
+(*
       structure WideTextPrimIO : PRIM_IO
-*)
-(*
       structure Windows : WINDOWS
 *)
       structure Word1: WORD
@@ -552,6 +550,38 @@
       sharing type Real64Array2.elem = Real64.real
       sharing type Real64Array2.vector = Real64Vector.vector
       sharing type Unix.exit_status = Posix.Process.exit_status
+      sharing type WideChar.string = WideString.string
+      sharing type WideCharArray.elem = WideChar.char
+      sharing type WideCharArray.vector = WideCharVector.vector
+      sharing type WideCharArraySlice.elem =  WideChar.char
+      sharing type WideCharArraySlice.array = WideCharArray.array
+      sharing type WideCharArraySlice.vector = WideCharVector.vector
+      sharing type WideCharArraySlice.vector_slice = WideCharVectorSlice.slice
+      sharing type WideCharVector.elem = WideChar.char
+      sharing type WideCharVector.vector = WideString.string
+      sharing type WideCharVectorSlice.elem = WideChar.char
+      sharing type WideCharVectorSlice.vector = WideString.string
+      sharing type WideCharVectorSlice.slice = WideSubstring.substring
+      sharing type WideString.string = WideCharVector.vector
+      sharing type WideString.char = WideChar.char
+      sharing type WideSubstring.substring = WideCharVectorSlice.slice
+      sharing type WideSubstring.string = WideString.string
+      sharing type WideSubstring.char = WideChar.char
+      sharing type WideText.Char.char = WideChar.char
+      sharing type WideText.String.string = WideString.string
+      sharing type WideText.Substring.substring = WideSubstring.substring
+      sharing type WideText.CharVector.vector = WideCharVector.vector
+      sharing type WideText.CharArray.array = WideCharArray.array
+      sharing type WideText.CharArraySlice.slice = WideCharArraySlice.slice
+      sharing type WideText.CharVectorSlice.slice = WideCharVectorSlice.slice
+(*
+      sharing type WideTextPrimIO.array = WideCharArray.array
+      sharing type WideTextPrimIO.array_slice = WideCharArraySlice.slice
+      sharing type WideTextPrimIO.elem = WideChar.char
+      sharing type WideTextPrimIO.pos = Position.int
+      sharing type WideTextPrimIO.vector = WideCharVector.vector
+      sharing type WideTextPrimIO.vector_slice = WideCharVectorSlice.slice
+*)
       sharing type WordArray.elem = word
       sharing type WordArray.vector = WordVector.vector
       sharing type WordArraySlice.elem = word

Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/generate-overloads.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/generate-overloads.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/generate-overloads.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -24,7 +24,7 @@
    @ List.map (List.tabulate (32, fn i => i + 1) @ [64],
                fn i => concat ["Word", Int.toString i])
 
-val text = ["Char", "String"]
+val text = ["Char", "String", "WideChar", "WideString", "Char2", "String2"]
 
 (* Order matters here in the appends, since the first element will be the
  * default.

Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/overloads.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/overloads.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/overloads.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -1,10 +1,3 @@
-(* Copyright (C) 2002-2005 Henry Cejtin, Matthew Fluet, Suresh
- *    Jagannathan, and Stephen Weeks.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
 (* This file is automatically generated.  Do not edit. *)
 
 _overload 2 ~ : 'a -> 'a
@@ -602,6 +595,10 @@
 and LargeReal.<
 and Char.<
 and String.<
+and WideChar.<
+and WideString.<
+and Char2.<
+and String2.<
 
 _overload 1 <= : 'a * 'a -> bool
 as  Int.<=
@@ -683,6 +680,10 @@
 and LargeReal.<=
 and Char.<=
 and String.<=
+and WideChar.<=
+and WideString.<=
+and Char2.<=
+and String2.<=
 
 _overload 1 > : 'a * 'a -> bool
 as  Int.>
@@ -764,6 +765,10 @@
 and LargeReal.>
 and Char.>
 and String.>
+and WideChar.>
+and WideString.>
+and Char2.>
+and String2.>
 
 _overload 1 >= : 'a * 'a -> bool
 as  Int.>=
@@ -845,3 +850,7 @@
 and LargeReal.>=
 and Char.>=
 and String.>=
+and WideChar.>=
+and WideString.>=
+and Char2.>=
+and String2.>=

Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/top-level.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/top-level.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/top-level.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -40,6 +40,8 @@
 structure Real64Vector = Real64Vector
 structure RealArray = RealArray
 structure RealVector = RealVector
+structure WideCharArraySlice = WideCharArraySlice
+structure WideCharArray = WideCharArray
 structure Word8Array = Word8Array
 structure Word8Vector = Word8Vector
 structure Word16Array = Word16Array

Modified: mlton/branches/unicode/basis-library/libs/basis-extra/basis-extra.mlb
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/basis-extra.mlb	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-extra/basis-extra.mlb	2005-11-24 00:51:12 UTC (rev 4250)
@@ -47,10 +47,9 @@
       ../../arrays-and-vectors/mono-array2.sig
       ../../arrays-and-vectors/mono-array2.fun
       ../../arrays-and-vectors/mono.sml
-      ../../text/string0.sml
-      ../../text/char0.sml
       ../../misc/reader.sig
       ../../misc/reader.sml
+      ../../text/charset.sml
       ../../text/string-cvt.sig
       ../../text/string-cvt.sml
       ../../general/bool.sig
@@ -58,11 +57,14 @@
       ../../integer/integer.sig
       ../../integer/int.sml
       ../../text/char.sig
+      ../../text/char.fun
       ../../text/char.sml
+      ../../text/string.sig
+      ../../text/string.fun
+      ../../text/string.sml
       ../../text/substring.sig
+      ../../text/substring.fun
       ../../text/substring.sml
-      ../../text/string.sig
-      ../../text/string.sml
       ../../misc/C.sig
       ../../misc/C.sml
       ../../integer/word.sig

Modified: mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sig
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sig	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sig	2005-11-24 00:51:12 UTC (rev 4250)
@@ -242,7 +242,6 @@
       structure SysWord : WORD
       structure Unix : UNIX
       structure UnixSock : UNIX_SOCK
-(*
       structure WideChar : CHAR
       structure WideCharArray : MONO_ARRAY
       structure WideCharArray2 : MONO_ARRAY2
@@ -252,9 +251,8 @@
       structure WideString : STRING
       structure WideSubstring : SUBSTRING
       structure WideText : TEXT
+(*
       structure WideTextPrimIO : PRIM_IO
-*)
-(*
       structure Windows : WINDOWS
 *)
       structure Word1: WORD
@@ -316,7 +314,37 @@
       structure MLton: MLTON
       structure SMLofNJ: SML_OF_NJ
       structure Unsafe: UNSAFE
+      
+      structure Char4 : CHAR
+      structure Char4Array : MONO_ARRAY
+      structure Char4Array2 : MONO_ARRAY2
+      structure Char4ArraySlice : MONO_ARRAY_SLICE
+      structure Char4Vector : MONO_VECTOR
+      structure Char4VectorSlice : MONO_VECTOR_SLICE
+      structure String4 : STRING
+      structure Substring4 : SUBSTRING
+      structure Text4 : TEXT
 
+      structure Char2 : CHAR
+      structure Char2Array : MONO_ARRAY
+      structure Char2Array2 : MONO_ARRAY2
+      structure Char2ArraySlice : MONO_ARRAY_SLICE
+      structure Char2Vector : MONO_VECTOR
+      structure Char2VectorSlice : MONO_VECTOR_SLICE
+      structure String2 : STRING
+      structure Substring2 : SUBSTRING
+      structure Text2 : TEXT
+
+      structure Char1 : CHAR
+      structure Char1Array : MONO_ARRAY
+      structure Char1Array2 : MONO_ARRAY2
+      structure Char1ArraySlice : MONO_ARRAY_SLICE
+      structure Char1Vector : MONO_VECTOR
+      structure Char1VectorSlice : MONO_VECTOR_SLICE
+      structure String1 : STRING
+      structure Substring1 : SUBSTRING
+      structure Text1 : TEXT
+
       sharing type MLton.IntInf.t = IntInf.int
       sharing type MLton.Process.pid = Posix.Process.pid
       sharing type MLton.ProcEnv.gid = Posix.ProcEnv.gid
@@ -578,6 +606,38 @@
       sharing type Real64Array2.elem = Real64.real
       sharing type Real64Array2.vector = Real64Vector.vector
       sharing type Unix.exit_status = Posix.Process.exit_status
+      sharing type WideChar.string = WideString.string
+      sharing type WideCharArray.elem = WideChar.char
+      sharing type WideCharArray.vector = WideCharVector.vector
+      sharing type WideCharArraySlice.elem =  WideChar.char
+      sharing type WideCharArraySlice.array = WideCharArray.array
+      sharing type WideCharArraySlice.vector = WideCharVector.vector
+      sharing type WideCharArraySlice.vector_slice = WideCharVectorSlice.slice
+      sharing type WideCharVector.elem = WideChar.char
+      sharing type WideCharVector.vector = WideString.string
+      sharing type WideCharVectorSlice.elem = WideChar.char
+      sharing type WideCharVectorSlice.vector = WideString.string
+      sharing type WideCharVectorSlice.slice = WideSubstring.substring
+      sharing type WideString.string = WideCharVector.vector
+      sharing type WideString.char = WideChar.char
+      sharing type WideSubstring.substring = WideCharVectorSlice.slice
+      sharing type WideSubstring.string = WideString.string
+      sharing type WideSubstring.char = WideChar.char
+      sharing type WideText.Char.char = WideChar.char
+      sharing type WideText.String.string = WideString.string
+      sharing type WideText.Substring.substring = WideSubstring.substring
+      sharing type WideText.CharVector.vector = WideCharVector.vector
+      sharing type WideText.CharArray.array = WideCharArray.array
+      sharing type WideText.CharArraySlice.slice = WideCharArraySlice.slice
+      sharing type WideText.CharVectorSlice.slice = WideCharVectorSlice.slice
+(*
+      sharing type WideTextPrimIO.array = WideCharArray.array
+      sharing type WideTextPrimIO.array_slice = WideCharArraySlice.slice
+      sharing type WideTextPrimIO.elem = WideChar.char
+      sharing type WideTextPrimIO.pos = Position.int
+      sharing type WideTextPrimIO.vector = WideCharVector.vector
+      sharing type WideTextPrimIO.vector_slice = WideCharVectorSlice.slice
+*)
       sharing type WordArray.elem = word
       sharing type WordArray.vector = WordVector.vector
       sharing type WordArraySlice.elem = word
@@ -627,6 +687,91 @@
       sharing type MLton.BinIO.outstream = BinIO.outstream
       sharing type MLton.TextIO.instream = TextIO.instream
       sharing type MLton.TextIO.outstream = TextIO.outstream
+      
+      (* extensions *)
+      sharing type Char4.string = String4.string
+      sharing type Char4Array.elem = Char4.char
+      sharing type Char4Array.vector = Char4Vector.vector
+      sharing type Char4ArraySlice.elem =  Char4.char
+      sharing type Char4ArraySlice.array = Char4Array.array
+      sharing type Char4ArraySlice.vector = Char4Vector.vector
+      sharing type Char4ArraySlice.vector_slice = Char4VectorSlice.slice
+      sharing type Char4Vector.elem = Char4.char
+      sharing type Char4Vector.vector = String4.string
+      sharing type Char4VectorSlice.elem = Char4.char
+      sharing type Char4VectorSlice.vector = String4.string
+      sharing type Char4VectorSlice.slice = Substring4.substring
+      sharing type String4.string = Char4Vector.vector
+      sharing type String4.char = Char4.char
+      sharing type Substring4.substring = Char4VectorSlice.slice
+      sharing type Substring4.string = String4.string
+      sharing type Substring4.char = Char4.char
+      sharing type Text4.Char.char = Char4.char
+      sharing type Text4.String.string = String4.string
+      sharing type Text4.Substring.substring = Substring4.substring
+      sharing type Text4.CharVector.vector = Char4Vector.vector
+      sharing type Text4.CharArray.array = Char4Array.array
+      sharing type Text4.CharArraySlice.slice = Char4ArraySlice.slice
+      sharing type Text4.CharVectorSlice.slice = Char4VectorSlice.slice
+      sharing type Char2.string = String2.string
+      sharing type Char2Array.elem = Char2.char
+      sharing type Char2Array.vector = Char2Vector.vector
+      sharing type Char2ArraySlice.elem =  Char2.char
+      sharing type Char2ArraySlice.array = Char2Array.array
+      sharing type Char2ArraySlice.vector = Char2Vector.vector
+      sharing type Char2ArraySlice.vector_slice = Char2VectorSlice.slice
+      sharing type Char2Vector.elem = Char2.char
+      sharing type Char2Vector.vector = String2.string
+      sharing type Char2VectorSlice.elem = Char2.char
+      sharing type Char2VectorSlice.vector = String2.string
+      sharing type Char2VectorSlice.slice = Substring2.substring
+      sharing type String2.string = Char2Vector.vector
+      sharing type String2.char = Char2.char
+      sharing type Substring2.substring = Char2VectorSlice.slice
+      sharing type Substring2.string = String2.string
+      sharing type Substring2.char = Char2.char
+      sharing type Text2.Char.char = Char2.char
+      sharing type Text2.String.string = String2.string
+      sharing type Text2.Substring.substring = Substring2.substring
+      sharing type Text2.CharVector.vector = Char2Vector.vector
+      sharing type Text2.CharArray.array = Char2Array.array
+      sharing type Text2.CharArraySlice.slice = Char2ArraySlice.slice
+      sharing type Text2.CharVectorSlice.slice = Char2VectorSlice.slice
+      sharing type Char1.string = String1.string
+      sharing type Char1Array.elem = Char1.char
+      sharing type Char1Array.vector = Char1Vector.vector
+      sharing type Char1ArraySlice.elem =  Char1.char
+      sharing type Char1ArraySlice.array = Char1Array.array
+      sharing type Char1ArraySlice.vector = Char1Vector.vector
+      sharing type Char1ArraySlice.vector_slice = Char1VectorSlice.slice
+      sharing type Char1Vector.elem = Char1.char
+      sharing type Char1Vector.vector = String1.string
+      sharing type Char1VectorSlice.elem = Char1.char
+      sharing type Char1VectorSlice.vector = String1.string
+      sharing type Char1VectorSlice.slice = Substring1.substring
+      sharing type String1.string = Char1Vector.vector
+      sharing type String1.char = Char1.char
+      sharing type Substring1.substring = Char1VectorSlice.slice
+      sharing type Substring1.string = String1.string
+      sharing type Substring1.char = Char1.char
+      sharing type Text1.Char.char = Char1.char
+      sharing type Text1.String.string = String1.string
+      sharing type Text1.Substring.substring = Substring1.substring
+      sharing type Text1.CharVector.vector = Char1Vector.vector
+      sharing type Text1.CharArray.array = Char1Array.array
+      sharing type Text1.CharArraySlice.slice = Char1ArraySlice.slice
+      sharing type Text1.CharVectorSlice.slice = Char1VectorSlice.slice
+      (* Bind the Char1=Char and Char4=WideChar *)
+      sharing type Char1Array.array = CharArray.array
+      sharing type Char1ArraySlice.array_slice = CharArraySlice.array_slice
+      sharing type String1.string = String.string
+      sharing type Substring1.substring = Substring.string
+      sharing type Char1.char = Char.char
+      sharing type Char4Array.array = WideCharArray.array
+      sharing type Char4ArraySlice.array_slice = WideCharArraySlice.array_slice
+      sharing type String4.string = WideString.string
+      sharing type Substring4.substring = WideSubstring.string
+      sharing type Char4.char = WideChar.char
    end
    (* bool is already defined as bool and so cannot be shared.
     * So, we where these to get the needed sharing.
@@ -773,3 +918,12 @@
    where type Word31.word = Word31.word
    where type Word32.word = Word32.word
    where type Word64.word = Word64.word
+   (* Top-level types for wide string and char constants *)
+   where type WideChar.char = WideChar.char
+   where type WideString.string = WideString.string
+   where type Char1.char = Char1.char
+   where type Char2.char = Char2.char
+   where type Char4.char = Char4.char
+   where type String1.string = String1.string
+   where type String2.string = String2.string
+   where type String4.string = String4.string

Modified: mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -171,7 +171,6 @@
       structure SysWord = SysWord
       structure Unix = Unix
       structure UnixSock = UnixSock
-(*
       structure WideChar = WideChar
       structure WideCharArray = WideCharArray
       structure WideCharArray2 = WideCharArray2
@@ -181,6 +180,7 @@
       structure WideString = WideString
       structure WideSubstring = WideSubstring
       structure WideText = WideText
+(*
       structure WideTextPrimIO = WideTextPrimIO
 *)
 (*
@@ -246,6 +246,34 @@
       structure MLton = MLton
       structure SMLofNJ = SMLofNJ
       structure Unsafe = Unsafe
+      
+      structure Char4 = Char4
+      structure Char4Array = Char4Array
+      structure Char4Array2 = Char4Array2
+      structure Char4ArraySlice = Char4ArraySlice
+      structure Char4Vector = Char4Vector
+      structure Char4VectorSlice = Char4VectorSlice
+      structure String4 = String4
+      structure WideSubstring4 = Substring4
+      structure Text4 = Text4
+      structure Char2 = Char2
+      structure Char2Array = Char2Array
+      structure Char2Array2 = Char2Array2
+      structure Char2ArraySlice = Char2ArraySlice
+      structure Char2Vector = Char2Vector
+      structure Char2VectorSlice = Char2VectorSlice
+      structure String2 = String2
+      structure WideSubstring2 = Substring2
+      structure Text2 = Text2
+      structure Char1 = Char1
+      structure Char1Array = Char1Array
+      structure Char1Array2 = Char1Array2
+      structure Char1ArraySlice = Char1ArraySlice
+      structure Char1Vector = Char1Vector
+      structure Char1VectorSlice = Char1VectorSlice
+      structure String1 = String1
+      structure WideSubstring1 = Substring1
+      structure Text1 = Text1
          
       open ArrayGlobal
            BoolGlobal

Modified: mlton/branches/unicode/basis-library/misc/primitive.sml
===================================================================
--- mlton/trunk/basis-library/misc/primitive.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/misc/primitive.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -275,6 +275,8 @@
             val fromInt8 = _prim "WordS8_toWord8": Int8.int -> char;
             val toWord8 = _prim "WordU8_toWord8": char -> Word8.word;
             val fromWord8 = _prim "WordU8_toWord8": Word8.word -> char;
+            val toWord32 = _prim "WordU8_toWord32": char -> Word32.word;
+            val fromWord32 = _prim "WordU8_toWord32": Word32.word -> char;
          end
 
       structure Char =
@@ -296,8 +298,8 @@
             val ord = _prim "WordU16_toWord32": char -> int;
             val toInt16 = _prim "WordS16_toWord16": char -> Int16.int;
             val fromInt16 = _prim "WordS16_toWord16": Int16.int -> char;
-            (* val toWord16 = _prim "WordU16_toWord16": char -> Word16.word; *)
-            (* val fromWord16 = _prim "WordU16_toWord16": Word16.word -> char; *)
+            val toWord32 = _prim "WordU16_toWord32": char -> Word32.word;
+            val fromWord32 = _prim "WordU16_toWord32": Word32.word -> char;
          end
       
       structure Char4 =
@@ -309,8 +311,6 @@
             val ord = _prim "WordU32_toWord32": char -> int;
             val toInt32 = _prim "WordS32_toWord32": char -> Int32.int;
             val fromInt32 = _prim "WordS32_toWord32": Int32.int -> char;
-            (* val toWord32 = _prim "WordU32_toWord32": char -> Word32.word; *)
-            (* val fromWord32 = _prim "WordU32_toWord32": Word32.word -> char; *)
          end
 
       structure CommandLine =

Copied: mlton/branches/unicode/basis-library/text/char.fun (from rev 4246, mlton/trunk/basis-library/text/char.sml)
===================================================================
--- mlton/trunk/basis-library/text/char.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char.fun	2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,303 @@
+(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ *)
+
+signature CHAR0 =
+   sig
+      eqtype char
+      eqtype string
+      
+      val minChar: char
+      val maxOrd: int
+      val maxChar: char
+      
+      (* raw chr,ord methods *)
+      val chr: int -> char
+      val ord: char -> int
+      val < : char * char -> bool
+      
+      val toUnicode: char -> Word32.word
+      val fromUnicode: Word32.word -> char
+      
+      structure CharVector : 
+         MONO_VECTOR 
+         where type elem = char 
+         where type vector = string
+   end
+   
+functor CharFn(Char0 : CHAR0) : CHAR_EXTRA =
+   struct
+      open Char0
+      
+      (* required operators *)
+      fun a >= b = not (a < b)
+      fun a >  b = b < a
+      fun a <= b = not (b < a)
+      
+      (* for convenience: *)
+      val op + = Int.+
+      val op - = Int.-
+      
+      fun succ c =
+         if Primitive.safe andalso c = maxChar
+            then raise Chr
+         else chr (ord c + 1)
+      
+      fun pred c =
+         if Primitive.safe andalso c = minChar
+            then raise Chr
+         else chr (ord c - 1)
+      
+      fun chrOpt c =
+         if Primitive.safe andalso Primitive.Int.gtu (c, maxOrd)
+            then NONE
+         else SOME (chr c)
+
+      fun chr c =
+         case chrOpt c of
+            NONE => raise Chr
+          | SOME c => c
+
+      val {compare, ...} = Util.makeCompare (op <)
+      
+      val isUpper = Charset.isUpper o toUnicode
+      val isLower = Charset.isLower o toUnicode
+      val isDigit = Charset.isDigit o toUnicode
+      val isAlpha = Charset.isAlpha o toUnicode
+      val isGraph = Charset.isGraph o toUnicode
+      val isPrint = Charset.isPrint o toUnicode
+      val isPunct = Charset.isPunct o toUnicode
+      val isCntrl = Charset.isCntrl o toUnicode
+      val isSpace = Charset.isSpace o toUnicode
+      val isAscii = Charset.isAscii o toUnicode
+      val isAlphaNum = Charset.isAlphaNum o toUnicode
+      val isHexDigit = Charset.isHexDigit o toUnicode
+      
+      val toUpper = fromUnicode o Charset.toUpper o toUnicode
+      val toLower = fromUnicode o Charset.toLower o toUnicode
+      
+      (* !!! fixme, use a table in SOME situations *)
+      fun contains s c = CharVector.exists (fn d => c = d) s
+      fun notContains s c = not (contains s c)
+
+(*      
+      fun control reader state =
+         case reader state of
+            NONE => NONE
+          | SOME (c, state) =>
+               if #"@" <= c andalso c <= #"_"
+                  then SOME (chr (ord c -? ord #"@"), state)
+               else NONE
+
+      fun formatChar reader state =
+         case reader state of
+            NONE => NONE
+          | SOME (c, state) =>
+               if Ascii.isSpace c
+                  then SOME ((), state)
+               else NONE
+
+      fun formatChars reader =
+         let
+            fun loop state =
+               case formatChar reader state of
+                  NONE => state
+                | SOME ((), state) => loop state
+         in
+            loop
+         end
+                  
+      val 'a formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a =
+         fn reader =>
+         let
+            fun loop state =
+               case reader state of
+                  SOME (#"\\", state1) =>
+                     (case formatChar reader state1 of
+                         NONE => state
+                       | SOME ((), state2) =>
+                            let
+                               val state3 = formatChars reader state2
+                            in
+                               case reader state3 of
+                                  SOME (#"\\", state4) => loop state4
+                                | _ => state
+                            end)
+                | _ => state
+         in
+            loop
+         end
+
+      fun 'a scan (reader: (Char.char, 'a) StringCvt.reader)
+        : (char, 'a) StringCvt.reader =
+         let
+            val escape: (char, 'a) StringCvt.reader =
+               fn state =>
+               case reader state of
+                  NONE => NONE
+                | SOME (c, state') =>
+                     let
+                        fun yes c = SOME (c, state')
+                     in
+                        case c of
+                           #"a" => yes #"\a"
+                         | #"b" => yes #"\b"
+                         | #"t" => yes #"\t"
+                         | #"n" => yes #"\n"
+                         | #"v" => yes #"\v"
+                         | #"f" => yes #"\f"
+                         | #"r" => yes #"\r"
+                         | #"\\" => yes #"\\"
+                         | #"\"" => yes #"\""
+                         | #"^" => control reader state'
+                         | #"u" =>
+                              Reader.mapOpt chrOpt
+                              (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
+                              state'
+                         | _ => (* 3 decimal digits *)
+                              Reader.mapOpt chrOpt
+                              (StringCvt.digitsExact (StringCvt.DEC, 3)
+                               reader)
+                              state
+                     end
+            val main: (char, 'a) StringCvt.reader =
+               fn state =>
+               let
+                  val state = formatSequences reader state
+               in
+                  case reader state of
+                     NONE => NONE
+                   | SOME (c, state) =>
+                        if isPrint c
+                           then
+                              case c of
+                                 #"\\" => escape state
+                               | #"\"" => NONE
+                               | _ => SOME (c, formatSequences reader state)
+                        else NONE
+               end
+         in
+            main
+         end
+
+      val fromString = StringCvt.scanString scan
+
+      fun 'a scanC (reader: (char, 'a) StringCvt.reader)
+        : (char, 'a) StringCvt.reader =
+         let
+            val rec escape =
+               fn state =>
+               case reader state of
+                  NONE => NONE
+                | SOME (c, state') =>
+                     let fun yes c = SOME (c, state')
+                     in case c of
+                        #"a" => yes #"\a"
+                      | #"b" => yes #"\b"
+                      | #"t" => yes #"\t"
+                      | #"n" => yes #"\n"
+                      | #"v" => yes #"\v"
+                      | #"f" => yes #"\f"
+                      | #"r" => yes #"\r"
+                      | #"?" => yes #"?"
+                      | #"\\" => yes #"\\"
+                      | #"\"" => yes #"\""
+                      | #"'" => yes #"'"
+                      | #"^" => control reader state'
+                      | #"x" =>
+                           Reader.mapOpt chrOpt
+                           (StringCvt.digits StringCvt.HEX reader)
+                           state'
+                      | _ =>
+                           Reader.mapOpt chrOpt
+                           (StringCvt.digitsPlus (StringCvt.OCT, 3) reader)
+                           state
+                     end
+            and main =
+               fn NONE => NONE
+                | SOME (c, state) =>
+                     if isPrint c
+                        then
+                           case c of
+                              #"\\" => escape state
+                            | _ => SOME (c, state)
+                     else NONE
+         in
+            main o reader
+         end
+
+      val fromCString = StringCvt.scanString scanC
+
+*)
+      fun padLeft (s: String.string, n: int): String.string =
+         let
+            val m = Char1Vector.length s
+            val diff = n -? m
+         in if Int.> (diff, 0)
+               then Char1Vector.concat [Char1Vector.tabulate (diff, fn _ => #"0"), s]
+            else if diff = 0
+                    then s
+                 else raise Fail "padLeft"
+         end
+      
+      fun memoize (f: char -> 'a): char -> 'a =
+         let val a = Array.tabulate (numChars, f o Char.chr)
+         in fn c => Array.sub (a, Char.ord c)
+         end
+      
+      val toString =
+         memoize
+         (fn c =>
+          if isPrint c
+             then
+                (case c of
+                    #"\\" => "\\\\"
+                  | #"\"" => "\\\""
+                  | _ => CharVector.new (1, c))
+          else
+             case c of
+                #"\a" => "\\a"
+              | #"\b" => "\\b"
+              | #"\t" => "\\t"
+              | #"\n" => "\\n"
+              | #"\v" => "\\v"
+              | #"\f" => "\\f"
+              | #"\r" => "\\r"
+              | _ =>
+                   if c < #" "
+                      then (String.concat
+                            ["\\^", CharVector.new (1, chr (ord c +? ord #"@"))])
+                   else String.concat 
+                        ["\\", padLeft (Int.fmt StringCvt.DEC (ord c), 3)])
+      
+      val toCString =
+         memoize
+         (fn c =>
+          if isPrint c
+             then
+                (case c of
+                    #"\\" => "\\\\"
+                  | #"\"" => "\\\""
+                  | #"?" => "\\?"
+                  | #"'" => "\\'"
+                  | _ => String0.str c)
+          else
+             case c of
+                #"\a" => "\\a"
+              | #"\b" => "\\b"
+              | #"\t" => "\\t"
+              | #"\n" => "\\n"
+              | #"\v" => "\\v"
+              | #"\f" => "\\f"
+              | #"\r" => "\\r"
+              | _ =>
+                   String.concat
+                   ["\\", padLeft (Int.fmt StringCvt.OCT (ord c), 3)])
+   end
+
+structure CharGlobal: CHAR_GLOBAL = Char
+open CharGlobal

Modified: mlton/branches/unicode/basis-library/text/char.sig
===================================================================
--- mlton/trunk/basis-library/text/char.sig	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char.sig	2005-11-24 00:51:12 UTC (rev 4250)
@@ -38,11 +38,11 @@
       val isPrint: char -> bool 
       val isPunct: char -> bool 
       val isSpace: char -> bool 
-      val fromString: string -> char option 
-      val scan: (char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
-      val toString: char -> string 
-      val fromCString: string -> char option
-      val toCString: char -> string
+      val fromString: String.string -> char option 
+      val scan: (Char.char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
+      val toString: char -> String.string 
+      val fromCString: String.string -> char option
+      val toCString: char -> String.string
    end
 
 signature CHAR_EXTRA =

Modified: mlton/branches/unicode/basis-library/text/char.sml
===================================================================
--- mlton/trunk/basis-library/text/char.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -6,218 +6,63 @@
  * See the file MLton-LICENSE for details.
  *)
 
-structure Char: CHAR_EXTRA =
+structure Char1 =
    struct
-      open Char0
-                       
-      fun control reader state =
-         case reader state of
-            NONE => NONE
-          | SOME (c, state) =>
-               if #"@" <= c andalso c <= #"_"
-                  then SOME (chr (ord c -? ord #"@"), state)
-               else NONE
+      open Primitive.Char1
+      
+      type char = char
+      type string = string
 
-      fun formatChar reader state =
-         case reader state of
-            NONE => NONE
-          | SOME (c, state) =>
-               if isSpace c
-                  then SOME ((), state)
-               else NONE
+      val minChar = #"\000"
+      val maxOrd: int = 255
+      val maxChar = #"\255"
+      
+      val toUnicode = Primitive.Char1.toWord32
+      val fromUnicode = Primitive.Char1.toWord32
+      
+      structure CharVector = Char1Vector
+end
 
-      fun formatChars reader =
-         let
-            fun loop state =
-               case formatChar reader state of
-                  NONE => state
-                | SOME ((), state) => loop state
-         in
-            loop
-         end
-                  
-      val 'a formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a =
-         fn reader =>
-         let
-            fun loop state =
-               case reader state of
-                  SOME (#"\\", state1) =>
-                     (case formatChar reader state1 of
-                         NONE => state
-                       | SOME ((), state2) =>
-                            let
-                               val state3 = formatChars reader state2
-                            in
-                               case reader state3 of
-                                  SOME (#"\\", state4) => loop state4
-                                | _ => state
-                            end)
-                | _ => state
-         in
-            loop
-         end
+structure Char2 =
+   struct
+      open Primitive.Char2
+      
+      type char = char
+      type string = string
+      
+      val minChar = #"\u0000"
+      val maxOrd: int = 65535
+      val maxChar = #"\uFFFF"
+      
+      val toUnicode = Primitive.Char2.toWord32
+      val fromUnicode = Primitive.Char2.toWord32
+      
+      structure CharVector = Char2Vector
+   end
 
-      fun 'a scan (reader: (char, 'a) StringCvt.reader)
-        : (char, 'a) StringCvt.reader =
-         let
-            val escape: (char, 'a) StringCvt.reader =
-               fn state =>
-               case reader state of
-                  NONE => NONE
-                | SOME (c, state') =>
-                     let
-                        fun yes c = SOME (c, state')
-                     in
-                        case c of
-                           #"a" => yes #"\a"
-                         | #"b" => yes #"\b"
-                         | #"t" => yes #"\t"
-                         | #"n" => yes #"\n"
-                         | #"v" => yes #"\v"
-                         | #"f" => yes #"\f"
-                         | #"r" => yes #"\r"
-                         | #"\\" => yes #"\\"
-                         | #"\"" => yes #"\""
-                         | #"^" => control reader state'
-                         | #"u" =>
-                              Reader.mapOpt chrOpt
-                              (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
-                              state'
-                         | _ => (* 3 decimal digits *)
-                              Reader.mapOpt chrOpt
-                              (StringCvt.digitsExact (StringCvt.DEC, 3)
-                               reader)
-                              state
-                     end
-            val main: (char, 'a) StringCvt.reader =
-               fn state =>
-               let
-                  val state = formatSequences reader state
-               in
-                  case reader state of
-                     NONE => NONE
-                   | SOME (c, state) =>
-                        if isPrint c
-                           then
-                              case c of
-                                 #"\\" => escape state
-                               | #"\"" => NONE
-                               | _ => SOME (c, formatSequences reader state)
-                        else NONE
-               end
-         in
-            main
-         end
-
-      val fromString = StringCvt.scanString scan
-
-      fun 'a scanC (reader: (char, 'a) StringCvt.reader)
-        : (char, 'a) StringCvt.reader =
-         let
-            val rec escape =
-               fn state =>
-               case reader state of
-                  NONE => NONE
-                | SOME (c, state') =>
-                     let fun yes c = SOME (c, state')
-                     in case c of
-                        #"a" => yes #"\a"
-                      | #"b" => yes #"\b"
-                      | #"t" => yes #"\t"
-                      | #"n" => yes #"\n"
-                      | #"v" => yes #"\v"
-                      | #"f" => yes #"\f"
-                      | #"r" => yes #"\r"
-                      | #"?" => yes #"?"
-                      | #"\\" => yes #"\\"
-                      | #"\"" => yes #"\""
-                      | #"'" => yes #"'"
-                      | #"^" => control reader state'
-                      | #"x" =>
-                           Reader.mapOpt chrOpt
-                           (StringCvt.digits StringCvt.HEX reader)
-                           state'
-                      | _ =>
-                           Reader.mapOpt chrOpt
-                           (StringCvt.digitsPlus (StringCvt.OCT, 3) reader)
-                           state
-                     end
-            and main =
-               fn NONE => NONE
-                | SOME (c, state) =>
-                     if isPrint c
-                        then
-                           case c of
-                              #"\\" => escape state
-                            | _ => SOME (c, state)
-                     else NONE
-         in
-            main o reader
-         end
-
-      val fromCString = StringCvt.scanString scanC
-
-      fun padLeft (s: string, n: int): string =
-         let
-            val m = String.size s
-            val diff = n -? m
-         in if Int.> (diff, 0)
-               then String.concat [String.new (diff, #"0"), s]
-            else if diff = 0
-                    then s
-                 else raise Fail "padLeft"
-         end
-
-      val toString =
-         memoize
-         (fn c =>
-          if isPrint c
-             then
-                (case c of
-                    #"\\" => "\\\\"
-                  | #"\"" => "\\\""
-                  | _ => String0.str c)
-          else
-             case c of
-                #"\a" => "\\a"
-              | #"\b" => "\\b"
-              | #"\t" => "\\t"
-              | #"\n" => "\\n"
-              | #"\v" => "\\v"
-              | #"\f" => "\\f"
-              | #"\r" => "\\r"
-              | _ =>
-                   if c < #" "
-                      then (String.concat
-                            ["\\^", String0.str (chr (ord c +? ord #"@"))])
-                   else String.concat 
-                        ["\\", padLeft (Int.fmt StringCvt.DEC (ord c), 3)])
+structure Char4 =
+   struct
+      open Primitive.Char2
       
-      val toCString =
-         memoize
-         (fn c =>
-          if isPrint c
-             then
-                (case c of
-                    #"\\" => "\\\\"
-                  | #"\"" => "\\\""
-                  | #"?" => "\\?"
-                  | #"'" => "\\'"
-                  | _ => String0.str c)
-          else
-             case c of
-                #"\a" => "\\a"
-              | #"\b" => "\\b"
-              | #"\t" => "\\t"
-              | #"\n" => "\\n"
-              | #"\v" => "\\v"
-              | #"\f" => "\\f"
-              | #"\r" => "\\r"
-              | _ =>
-                   String.concat
-                   ["\\", padLeft (Int.fmt StringCvt.OCT (ord c), 3)])
+      type char = char
+      type string = string
+      
+      val minChar = #"\U00000000"
+      val maxOrd: int = 4294967295
+      val maxChar = #"\UFFFFFFFF"
+      
+      fun toUnicode x = x
+      fun fromUnicode x = x
+      
+      structure CharVector = Char4Vector
    end
 
+structure Char1 : CHAR_EXTRA = CharFn(Char1)
+structure Char2 : CHAR_EXTRA = CharFn(Char2)
+structure Char4 : CHAR_EXTRA = CharFn(Char4)
+
+structure Char = Char1
+structure WideChar = Char4
+
 structure CharGlobal: CHAR_GLOBAL = Char
 open CharGlobal
-

Deleted: mlton/branches/unicode/basis-library/text/char0.sml
===================================================================
--- mlton/trunk/basis-library/text/char0.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char0.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -1,97 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- *    Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-structure Char0 =
-   struct
-      open Primitive.Int Primitive.Char
-         
-      type char = char
-      type string = string
-
-      val minChar = #"\000"
-      val numChars: int = 256
-      val maxOrd: int = 255
-      val maxChar = #"\255"
-
-      fun succ c =
-         if Primitive.safe andalso c = maxChar
-            then raise Chr
-         else Primitive.Char.chr (ord c + 1)
-
-      fun pred c =
-         if Primitive.safe andalso c = minChar
-            then raise Chr
-         else Primitive.Char.chr (ord c - 1)
-
-      fun chrOpt c =
-         if Primitive.safe andalso Primitive.Int.gtu (c, maxOrd)
-            then NONE
-         else SOME (Primitive.Char.chr c)
-
-      fun chr c =
-         case chrOpt c of
-            NONE => raise Chr
-          | SOME c => c
-
-      val {compare, ...} = Util.makeCompare (op <)
-
-      structure String = String0
-
-      fun oneOf s =
-         let
-            val a = Array.array (numChars, false)
-            val n = String.size s
-            fun loop i =
-               if Primitive.Int.>= (i, n) then ()
-               else (Array.update (a, ord (String.sub (s, i)), true)
-                     ; loop (i + 1))
-         in loop 0
-            ; fn c => Array.sub (a, ord c)
-         end
-      val contains = oneOf
-
-      fun notOneOf s = not o (oneOf s)
-      val notContains = notOneOf
-
-      fun memoize (f: char -> 'a): char -> 'a =
-         let val a = Array.tabulate (numChars, f o chr)
-         in fn c => Array.sub (a, ord c)
-         end
-         
-      local
-         val not = fn f => memoize (not o f)
-         infix or andd
-         fun f or g = memoize (fn c => f c orelse g c)
-         fun f andd g = memoize (fn c => f c andalso g c)
-      in
-         val isLower = oneOf "abcdefghijklmnopqrstuvwxyz"
-         val isUpper = oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-         val isDigit = oneOf "0123456789"
-         val isAlpha = isUpper or isLower
-         val isHexDigit = isDigit or (oneOf "abcdefABCDEF")
-         val isAlphaNum = isAlpha or isDigit
-         val isPrint = fn c => #" " <= c andalso c <= #"~"
-         val isSpace = oneOf " \t\r\n\v\f"
-         val isGraph = (not isSpace) andd isPrint
-         val isPunct = isGraph andd (not isAlphaNum)
-         val isCntrl = not isPrint
-         val isAscii = fn c => c < #"\128"
-      end
-
-      local
-         fun make (lower, upper, diff) =
-            memoize (fn c => if lower <= c andalso c <= upper
-                               then chr (ord c +? diff)
-                            else c)
-         val diff = ord #"A" - ord #"a"
-      in
-         val toLower = make (#"A", #"Z", ~diff)
-         val toUpper = make (#"a", #"z", diff)
-      end
-   end
-

Added: mlton/branches/unicode/basis-library/text/charset.sml
===================================================================
--- mlton/trunk/basis-library/text/charset.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/charset.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,138 @@
+signature CHARSET =
+   sig
+      type t = Word32.word
+      
+      val isAscii: t -> bool
+      val isAlpha: t -> bool
+      val isAlphaNum: t -> bool
+      val isCntrl: t -> bool
+      val isDigit: t -> bool
+      val isGraph: t -> bool
+      val isHexDigit: t -> bool
+      val isLower: t -> bool
+      val isPrint: t -> bool
+      val isSpace: t -> bool
+      val isPunct: t -> bool
+      val isUpper: t -> bool
+      
+      val toUpper: t -> t
+      val toLower: t -> t
+   end
+
+structure Charset :> CHARSET =
+   struct
+      local
+         open Primitive.Word32
+      in
+         infix 5 >> << andb orb xorb
+         type t = Word32.word
+         
+         (* these are computed by running ./hash *)
+         val size = 32768
+         val mask = 0wx7FFF
+         val shift = 0w14
+         val factor = 0w5390
+         
+         fun hash c = ((c >> shift) * factor + c) andb mask
+         
+         (* this is computed by running ./parse *)
+         val unicodedb = "12345678"
+         val udelta = Vector.fromList [ 0w12, 0w23, 0w22 ]
+         val ldelta = Vector.fromList [ 0w2, 0w3, 0w5 ]
+         
+         (* decode the compressed unicode database.
+          * Each entry in the resulting table has format:
+          *   bits [  0, 21) = the Unicode code point in this bucket
+          *   bits [ 21, 28) = the uppercase delta
+          *   bits [ 28, 30) = CLASS = LETTER | NUMERAL | CONTROL | PUNCTUATION
+          *   bits [ 30, 32) = CASE = UPPER | LOWER | WHITESPACE | (other)
+          *)
+         val CODEPOINT   = 0wx001FFFFF
+         val CLASS       = 0wx30000000
+         val LETTER      = 0wx00000000
+         val NUMERAL     = 0wx10000000
+         val CONTROL     = 0wx20000000
+         val PUNCTUATION = 0wx30000000
+         val CASE        = 0wxC0000000
+         val UPPER       = 0wx00000000
+         val LOWER       = 0wx40000000
+         val WHITESPACE  = 0wx80000000
+         val DELTASHIFT  = 0w21
+         val DELTAMASK   = 0wx7F
+         val table =
+            let
+               val a = Array.tabulate (size, fn _ => 0w0)
+            in
+               Array.vector a
+            end
+         
+         fun fetch c =
+            let
+               val x = toInt (hash c)
+               val v = Vector.sub (table, x)
+            in
+               if c = v andb CODEPOINT then SOME v else
+               (* only 6 unicode chars fail that test, catch them here: *)
+               let
+                  val v = Vector.sub (table, Primitive.Int.+ (x, 1))
+               in
+                  if c = v andb CODEPOINT then SOME v else NONE
+               end
+            end
+         
+         fun isClass class c = 
+            case fetch c of
+               NONE => false
+             | SOME v => v andb CLASS = class
+         
+         val isAlpha = isClass LETTER
+         val isDigit = isClass NUMERAL
+         val isCntrl = isClass CONTROL
+         val isPunct = isClass PUNCTUATION
+         
+         fun isCase cs c =
+            case fetch c of
+               NONE => false
+             | SOME v => v andb CASE = cs
+         
+         val isUpper = isCase UPPER
+         val isLower = isCase LOWER
+         val isSpace = isCase WHITESPACE
+         
+         (* derived methods *)
+         fun isAscii c = c < 0w128
+         
+         (* Both LETTER and NUMERAL have a 0 in bit 28 *)
+         fun isAlphaNum c = 
+            case fetch c of
+               NONE => false
+             | SOME v => c andb CONTROL = 0w0
+         
+         fun isPrint c = 
+            case fetch c of
+               NONE => false (* complement of control, excludes non-unicode *)
+             | SOME v => v andb CLASS <> CONTROL
+         
+         (* printable, but not whitespace *)
+         fun isGraph c = 
+            case fetch c of
+               NONE => false
+             | SOME v => v andb CLASS <> CONTROL andalso v andb CASE <> WHITESPACE
+         
+         fun isHexDigit c = false (* !!! damn !!! *)
+         
+         (* Use the delta tables to convert case 
+          * We exploit the fact that a character cannot have both
+          * uppercase and lowercase mappings simultaneously.
+          *)
+         fun delta v = toInt (v >> DELTASHIFT andb DELTAMASK)
+         fun toLower c =
+            case fetch c of
+               NONE => c
+             | SOME v => c + Vector.sub (ldelta, delta v)
+         fun toUpper c =
+            case fetch c of
+               NONE => c
+             | SOME v => c + Vector.sub (udelta, delta v)
+      end
+   end

Modified: mlton/branches/unicode/basis-library/text/string-cvt.sml
===================================================================
--- mlton/trunk/basis-library/text/string-cvt.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string-cvt.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -6,9 +6,14 @@
  * See the file MLton-LICENSE for details.
  *)
 
+(* The String and Char structures are not yet available at this point.
+ * They need the StringCvt structure for their signatures.
+ * Therefore, we use CharVector methods.
+ *)
 structure StringCvt: STRING_CVT_EXTRA =
    struct
       open Reader
+      structure Char = Primitive.Char
 
       val wordFromInt = Primitive.Word32.fromInt
 
@@ -31,25 +36,22 @@
 
       open Primitive.Int
 
-      structure Char = Char0
-      structure String = String0
-
       local
          fun pad f (c: char) i s =
             let
-               val n = String.size s
+               val n = CharVector.length s
             in
                if n >= i
                   then s
-               else f (s, String0.vector (i -? n, c))
+               else f (s, CharVector.vector (i -? n, c))
             end
       in
-         val padLeft = pad (fn (s, pad) => String.^ (pad, s))
-         val padRight = pad String.^
+         val padLeft = pad (fn (s, pad) => CharVector.append (pad, s))
+         val padRight = pad CharVector.append
       end
 
       fun splitl p f src =
-         let fun done chars = String0.implode (rev chars)
+         let fun done chars = CharVector.fromList (rev chars)
             fun loop (src, chars) =
                case f src of
                   NONE => (done chars, src)
@@ -63,14 +65,12 @@
       fun takel p f s = #1 (splitl p f s)
       fun dropl p f s = #2 (splitl p f s)
 
-      fun skipWS x = dropl Char.isSpace x
-
       type cs = int
 
       fun stringReader (s: string): (char, cs) reader =
-         fn i => if i >= String.size s
+         fn i => if i >= CharVector.length s
                     then NONE
-                 else SOME (String.sub (s, i), i + 1)
+                 else SOME (CharVector.sub (s, i), i + 1)
                     
       fun 'a scanString (f: ((char, cs) reader -> ('a, cs) reader)) (s: string)
         : 'a option =
@@ -79,6 +79,24 @@
           | SOME (a, _) => SOME a
 
       local
+         val numChars = 256
+         fun memoize (f: char -> 'a): char -> 'a =
+            let val a = Array.tabulate (numChars, f o Char.chr)
+            in fn c => Array.sub (a, Char.ord c)
+            end
+         fun oneOf s =
+            let
+               val a = Array.array (numChars, false)
+               val n = CharVector.length s
+               fun loop i =
+                  if Primitive.Int.>= (i, n) then ()
+                  else (Array.update (a, Char.ord (CharVector.sub (s, i)), true)
+                        ; loop (i + 1))
+            in loop 0
+               ; fn c => Array.sub (a, Char.ord c)
+            end
+         val isSpace = oneOf " \t\r\n\v\f\u0085\u00A0" (* 85, A0 are latin spaces *)
+         
          fun range (add: int, cmin: char, cmax: char): char -> int option =
             let val min = Char.ord cmin
             in fn c => if Char.<= (cmin, c) andalso Char.<= (c, cmax)
@@ -87,7 +105,7 @@
             end
 
          fun 'a combine (ds: (char -> 'a option) list): char -> 'a option =
-            Char.memoize
+            memoize
             (fn c =>
              let
                 val rec loop =
@@ -99,13 +117,14 @@
              in loop ds
              end)
             
-         val bin = Char.memoize (range (0, #"0", #"1"))
-         val oct = Char.memoize (range (0, #"0", #"7"))
-         val dec = Char.memoize (range (0, #"0", #"9"))
+         val bin = memoize (range (0, #"0", #"1"))
+         val oct = memoize (range (0, #"0", #"7"))
+         val dec = memoize (range (0, #"0", #"9"))
          val hex = combine [range (0, #"0", #"9"),
                             range (10, #"a", #"f"),
                             range (10, #"A", #"F")]
       in
+         fun skipWS x = dropl isSpace x
          fun charToDigit (radix: radix): char -> int option =
             case radix of
                BIN => bin
@@ -195,5 +214,5 @@
                 | SOME n => loop (n, state)
          end
 
-      fun digitToChar (n: int): char = String.sub ("0123456789ABCDEF", n)
+      fun digitToChar (n: int): char = CharVector.sub ("0123456789ABCDEF", n)
    end

Copied: mlton/branches/unicode/basis-library/text/string.fun (from rev 4246, mlton/trunk/basis-library/text/string.sml)
===================================================================
--- mlton/trunk/basis-library/text/string.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string.fun	2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,70 @@
+(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ *)
+
+signature STRING0 =
+   sig
+      structure CharVector : MONO_VECTOR_EXTRA
+      structure Char : CHAR
+   end
+
+functor StringFn(String0 : STRING0) : STRING_EXTRA =
+   struct
+      open String0 CharVector
+      
+      type char = elem
+      type string = vector
+      val maxSize = maxLen
+      val size = length
+      fun extract (s, start, len) = 
+         CharVectorSlice.vector (CharVectorSlice.slice (s, start, len))
+      fun substring (s, start, len) = extract (s, start, SOME len)
+      val op ^ = append
+      val new = vector
+      fun str c = new (1, c)
+      val implode = fromList
+      val explode = toList
+      
+      val toLower = translate (str o Char.toLower)
+
+      local
+         fun make f = f (op = : char * char -> bool)
+      in
+        val isPrefix = make isPrefix
+        val isSubstring = make isSubvector
+        val isSuffix = make isSuffix
+      end
+      val compare = collate Char.compare
+      val {<, <=, >, >=} = Util.makeOrder compare
+
+      val toString = translate Char.toString
+      val toCString = translate Char.toCString
+
+      val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader =
+         fn reader =>
+         let
+            fun loop (state, cs) =
+               case Char.scan reader state of
+                  NONE => SOME (implode (rev cs),
+                                Char.formatSequences reader state)
+                | SOME (c, state) => loop (state, c :: cs)
+         in
+            fn state => loop (state, [])
+         end
+         
+      val fromString = StringCvt.scanString scan
+         
+      fun scanString scanChar (reader: (char, 'a) StringCvt.reader)
+        : (string, 'a) StringCvt.reader =
+         fn state =>
+         Option.map (fn (cs, state) => (implode cs, state))
+         (Reader.list (scanChar reader) state)
+
+      val fromCString = StringCvt.scanString (scanString Char.scanC)
+
+      fun nullTerm s = s ^ "\000"
+   end

Modified: mlton/branches/unicode/basis-library/text/string.sml
===================================================================
--- mlton/trunk/basis-library/text/string.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -6,50 +6,13 @@
  * See the file MLton-LICENSE for details.
  *)
 
-structure String: STRING_EXTRA =
-   struct
-      open String0
+structure String1 = StringFn(Char1Vector)
+structure String2 = StringFn(Char2Vector)
+structure String4 = StringFn(Char4Vector)
 
-      val toLower = translate (str o Char.toLower)
+structure String = String1
+structure WideString = String4
 
-      local
-         fun make f = f (op = : char * char -> bool)
-      in
-        val isPrefix = make isPrefix
-        val isSubstring = make isSubvector
-        val isSuffix = make isSuffix
-      end
-      val compare = collate Char.compare
-      val {<, <=, >, >=} = Util.makeOrder compare
-
-      val toString = translate Char.toString
-      val toCString = translate Char.toCString
-
-      val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader =
-         fn reader =>
-         let
-            fun loop (state, cs) =
-               case Char.scan reader state of
-                  NONE => SOME (implode (rev cs),
-                                Char.formatSequences reader state)
-                | SOME (c, state) => loop (state, c :: cs)
-         in
-            fn state => loop (state, [])
-         end
-         
-      val fromString = StringCvt.scanString scan
-         
-      fun scanString scanChar (reader: (char, 'a) StringCvt.reader)
-        : (string, 'a) StringCvt.reader =
-         fn state =>
-         Option.map (fn (cs, state) => (implode cs, state))
-         (Reader.list (scanChar reader) state)
-
-      val fromCString = StringCvt.scanString (scanString Char.scanC)
-
-      fun nullTerm s = s ^ "\000"
-   end
-
 structure StringGlobal: STRING_GLOBAL = String
 open StringGlobal
 

Deleted: mlton/branches/unicode/basis-library/text/string0.sml
===================================================================
--- mlton/trunk/basis-library/text/string0.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string0.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -1,32 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- *    Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-structure String0 = 
-   struct
-      open CharVector
-      type char = elem
-      type string = vector
-      structure Substring0 =
-         struct
-            open CharVectorSlice
-            type char = elem
-            type string = vector
-            type substring = slice
-         end
-      val maxSize = maxLen
-      val size = length
-      fun extract (s, start, len) = 
-         CharVectorSlice.vector (CharVectorSlice.slice (s, start, len))
-      fun substring (s, start, len) = extract (s, start, SOME len)
-      val op ^ = append
-      val new = vector
-      fun str c = new (1, c)
-      val implode = fromList
-      val explode = toList
-   end
-structure Substring0 = String0.Substring0

Copied: mlton/branches/unicode/basis-library/text/substring.fun (from rev 4246, mlton/trunk/basis-library/text/substring.sml)
===================================================================
--- mlton/trunk/basis-library/text/substring.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/substring.fun	2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,51 @@
+(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ *    Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ *)
+
+(* The :> is to hide the type substring.  We must add the where's to make char
+ * and string the same as the toplevel types.
+ *)
+functor SubstringFn(CharVectorSlice : MONO_VECTOR_SLICE) =
+   struct
+      open CharVectorSlice
+      type char = elem
+      type string = vector
+      type substring = slice
+
+      val size = length
+      val extract = slice
+      fun substring (s, start, len) = extract (s, start, SOME len)
+      val string = vector
+      val getc = getItem
+      fun first ss = Option.map #1 (getItem ss)
+      val slice = subslice
+      val explode = toList
+      local
+         fun make f = f (op = : char * char -> bool)
+      in
+        val isPrefix = make isPrefix
+        val isSubstring = make isSubvector
+        val isSuffix = make isSuffix
+        val position = make position
+      end
+      val compare = collate Char.compare
+(*
+      type cs = int
+         
+      fun reader (T {str, start, size}): (char, cs) Reader.reader =
+         fn i => if i >= size
+                    then NONE
+                 else SOME (String.sub (str, start +? i), i + 1)
+                    
+      fun 'a scanSubstring
+         (f: (char, cs) Reader.reader -> ('a, int) Reader.reader)
+         (ss: substring): 'a option =
+         case f (reader ss) 0 of
+            NONE => NONE
+          | SOME (a, _) => SOME a
+*)
+   end

Modified: mlton/branches/unicode/basis-library/text/substring.sml
===================================================================
--- mlton/trunk/basis-library/text/substring.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/substring.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -9,48 +9,30 @@
 (* The :> is to hide the type substring.  We must add the where's to make char
  * and string the same as the toplevel types.
  *)
-structure Substring
+
+structure Substring1 
    :> SUBSTRING_EXTRA
-   where type char = char
-   where type string = string
-   where type substring = CharVectorSlice.slice
-   =
-   struct
-      open Substring0
+   where type char = Char1.char
+   where type string = String1.string
+   where type substring = Char1VectorSlice.slice
+   = SubstringFn(Char1VectorSlice)
 
-      val size = length
-      val extract = slice
-      fun substring (s, start, len) = extract (s, start, SOME len)
-      val string = vector
-      val getc = getItem
-      fun first ss = Option.map #1 (getItem ss)
-      val slice = subslice
-      val explode = toList
-      local
-         fun make f = f (op = : char * char -> bool)
-      in
-        val isPrefix = make isPrefix
-        val isSubstring = make isSubvector
-        val isSuffix = make isSuffix
-        val position = make position
-      end
-      val compare = collate Char.compare
-(*
-      type cs = int
-         
-      fun reader (T {str, start, size}): (char, cs) Reader.reader =
-         fn i => if i >= size
-                    then NONE
-                 else SOME (String.sub (str, start +? i), i + 1)
-                    
-      fun 'a scanSubstring
-         (f: (char, cs) Reader.reader -> ('a, int) Reader.reader)
-         (ss: substring): 'a option =
-         case f (reader ss) 0 of
-            NONE => NONE
-          | SOME (a, _) => SOME a
-*)
-   end
+structure Substring2
+   :> SUBSTRING_EXTRA
+   where type char = Char2.char
+   where type string = String2.string
+   where type substring = Char2VectorSlice.slice
+   = SubstringFn(Char2VectorSlice)
 
+structure Substring4
+   :> SUBSTRING_EXTRA
+   where type char = Char4.char
+   where type string = String4.string
+   where type substring = Char4VectorSlice.slice
+   = SubstringFn(Char4VectorSlice)
+
+structure Substring = Substring1
+structure WideSubstring = Substring4
+
 structure SubstringGlobal: SUBSTRING_GLOBAL = Substring
 open SubstringGlobal

Modified: mlton/branches/unicode/basis-library/text/text.sml
===================================================================
--- mlton/trunk/basis-library/text/text.sml	2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/text.sml	2005-11-24 00:51:12 UTC (rev 4250)
@@ -5,13 +5,36 @@
  * See the file MLton-LICENSE for details.
  *)
 
-structure Text: TEXT =
+structure Text1: TEXT =
    struct
-      structure Char = Char
-      structure CharArray = CharArray
-      structure CharArraySlice = CharArraySlice
-      structure CharVector = CharVector
-      structure CharVectorSlice = CharVectorSlice
-      structure String = String
-      structure Substring = Substring
+      structure Char = Char1
+      structure CharArray = Char1Array
+      structure CharArraySlice = Char1ArraySlice
+      structure CharVector = Char1Vector
+      structure CharVectorSlice = Char1VectorSlice
+      structure String = String1
+      structure Substring = Substring1
    end
+structure Text2: TEXT =
+   struct
+      structure Char = Char2
+      structure CharArray = Char2Array
+      structure CharArraySlice = Char2ArraySlice
+      structure CharVector = Char2Vector
+      structure CharVectorSlice = Char2VectorSlice
+      structure String = String2
+      structure Substring = Substring2
+   end
+structure Text4: TEXT =
+   struct
+      structure Char = Char4
+      structure CharArray = Char4Array
+      structure CharArraySlice = Char4ArraySlice
+      structure CharVector = Char4Vector
+      structure CharVectorSlice = Char4VectorSlice
+      structure String = String4
+      structure Substring = Substring4
+   end
+
+structure Text = Text1
+structure WideText = Text4

Added: mlton/branches/unicode/basis-library/text/unicode-4.1.0/PropList.txt.gz
===================================================================
(Binary files differ)


Property changes on: mlton/branches/unicode/basis-library/text/unicode-4.1.0/PropList.txt.gz
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: mlton/branches/unicode/basis-library/text/unicode-4.1.0/UnicodeData.txt.gz
===================================================================
(Binary files differ)


Property changes on: mlton/branches/unicode/basis-library/text/unicode-4.1.0/UnicodeData.txt.gz
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream