[MLton] cvs commit: spaces in mlb imported file names

Matthew Fluet fluet@mlton.org
Sat, 25 Sep 2004 14:39:22 -0700


fluet       04/09/25 14:39:21

  Modified:    mlton/front-end ml.lex mlb.lex
  Log:
  MAIL spaces in mlb imported file names
  
  Extended the MLB lexer to parse SML style strings, which are passed as
  FILE tokens to the MLB parser.  This allows arbitrary characters in
  file names.  Note, however, that  mlton -stop f  will print file names
  without quoting, so spaces in file names break using  mlton -stop f
  in Makefiles.
  
  Note that the MLB lexer has a fairly limited regexp for filenames.
  Filenames must be composed of alpha-num characters or "-" or "_".  So,
  you need to "quote" for anything more advanced.
  
  I'm not 100% happy with this situation, but I wasn't very successful
  in determining the Makefile or bash filename regexp.  Also,
  considering that this is SML, I'm tempted to use SML style strings,
  which makes it easy for a user to understand what they are writing
  down, although it makes writing Win32 filenames a bit verbose.

Revision  Changes    Path
1.17      +1 -1      mlton/mlton/front-end/ml.lex

Index: ml.lex
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/front-end/ml.lex,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- ml.lex	15 Sep 2004 18:16:28 -0000	1.16
+++ ml.lex	25 Sep 2004 21:39:21 -0000	1.17
@@ -309,7 +309,7 @@
 			  Char.ord(String.sub(yytext, 1)) * 100
 			  + Char.ord(String.sub(yytext, 2)) * 10
 			  + Char.ord(String.sub(yytext, 3))
-			  - (Char.ord #"0") *111
+			  - (Char.ord #"0") * 111
 		    in (if x > 255
 			   then stringError (source, yypos,
 					     "illegal ascii escape")



1.2       +113 -20   mlton/mlton/front-end/mlb.lex

Index: mlb.lex
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/front-end/mlb.lex,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- mlb.lex	28 Jul 2004 21:05:15 -0000	1.1
+++ mlb.lex	25 Sep 2004 21:39:21 -0000	1.2
@@ -13,11 +13,14 @@
 val commentStart = ref SourcePos.bogus
 val lineFile: File.t ref = ref ""
 val lineNum: int ref = ref 0
+val stringStart = ref SourcePos.bogus
 
 fun lineDirective (source, file, yypos) =
    Source.lineDirective (source, file,
 			 {lineNum = !lineNum,
 			  lineStart = yypos - !colNum})
+fun addString (s: string) = charlist := s :: (!charlist)
+fun addChar (c: char) = addString (String.fromChar c)
 
 fun inc (ri as ref (i: int)) = (ri := i + 1)
 fun dec (ri as ref (i: int)) = (ri := i-1)
@@ -27,6 +30,11 @@
 				  right = Source.getPos (source, right)},
 		     msg)
 
+fun stringError (source, right, msg) =
+   Control.errorStr (Region.make {left = !stringStart,
+				  right = Source.getPos (source, right)},
+		     msg)
+
 val eof: lexarg -> lexresult =
    fn {source, ...} =>
    let
@@ -64,50 +72,63 @@
 
 %% 
 %reject
-%s A L LL LLC LLCQ;
+%s A S F L LL LLC LLCQ;
 %header (functor MLBLexFun (structure Tokens : MLB_TOKENS));
 %arg ({source});
 alphanum=[A-Za-z'_0-9]*;
 alphanumId=[A-Za-z]{alphanum};
 id={alphanumId};
-envvar="$("([A-Z_]+)")";
+
+pathvar="$("([A-Z_]+)")";
 filebase=[-A-Za-z_0-9]+;
 fileext=[-A-Za-z_0-9]+;
 filename={filebase}("."{fileext})*;
-arc=({envvar}|{filename}|"."|"..");
+arc=({pathvar}|{filename}|"."|"..");
 relpath=({arc}"/")*;
 abspath="/"{relpath};
 path={relpath}|{abspath};
 file={path}{filename};
+
 ws=("\012"|[\t\ ])*;
 nrws=("\012"|[\t\ ])+;
 cr="\013";
 nl="\010";
 eol=({cr}{nl}|{nl}|{cr});
 
+hexDigit=[0-9a-fA-F];
+
 %%
 <INITIAL>{ws}	=> (continue ());
 <INITIAL>{eol}	=> (Source.newline (source, yypos); continue ());
-
+<INITIAL>"_prim" 
+                => (tok (Tokens.PRIM, source, yypos, yypos + 4));
 <INITIAL>","	=> (tok (Tokens.COMMA, source, yypos, yypos + 1));
 <INITIAL>";"	=> (tok (Tokens.SEMICOLON, source, yypos, yypos + 1));
-<INITIAL>"=" => (tok (Tokens.EQUALOP, source, yypos, yypos + 1));
-<INITIAL>"ann" => (tok (Tokens.ANN, source, yypos, yypos + 3));
-<INITIAL>"and" => (tok (Tokens.AND, source, yypos, yypos + 3));
-<INITIAL>"bas" => (tok (Tokens.BAS, source, yypos, yypos + 3));
-<INITIAL>"basis" => (tok (Tokens.BASIS, source, yypos, yypos + 5));
-<INITIAL>"end" => (tok (Tokens.END, source, yypos, yypos + 3));
-<INITIAL>"functor" => (tok (Tokens.FUNCTOR, source, yypos, yypos + 7));
-<INITIAL>"in" => (tok (Tokens.IN, source, yypos, yypos + 2));
-<INITIAL>"let" => (tok (Tokens.LET, source, yypos, yypos + 3));
-<INITIAL>"local" => (tok (Tokens.LOCAL, source, yypos, yypos + 5));
+<INITIAL>"="    => (tok (Tokens.EQUALOP, source, yypos, yypos + 1));
+<INITIAL>"ann"  => (tok (Tokens.ANN, source, yypos, yypos + 3));
+<INITIAL>"and"  => (tok (Tokens.AND, source, yypos, yypos + 3));
+<INITIAL>"bas"  => (tok (Tokens.BAS, source, yypos, yypos + 3));
+<INITIAL>"basis" 
+                => (tok (Tokens.BASIS, source, yypos, yypos + 5));
+<INITIAL>"end"  => (tok (Tokens.END, source, yypos, yypos + 3));
+<INITIAL>"functor" 
+                => (tok (Tokens.FUNCTOR, source, yypos, yypos + 7));
+<INITIAL>"in"   => (tok (Tokens.IN, source, yypos, yypos + 2));
+<INITIAL>"let"  => (tok (Tokens.LET, source, yypos, yypos + 3));
+<INITIAL>"local" 
+                => (tok (Tokens.LOCAL, source, yypos, yypos + 5));
 <INITIAL>"open" => (tok (Tokens.OPEN, source, yypos, yypos + 4));
-<INITIAL>"_prim" => (tok (Tokens.PRIM, source, yypos, yypos + 4));
-<INITIAL>"signature" => (tok (Tokens.SIGNATURE, source, yypos, yypos + 9));
-<INITIAL>"structure" => (tok (Tokens.STRUCTURE, source, yypos, yypos + 9));
-<INITIAL>{id} => (tok' (Tokens.ID, yytext, source, yypos));
+<INITIAL>"signature" 
+                => (tok (Tokens.SIGNATURE, source, yypos, yypos + 9));
+<INITIAL>"structure" 
+                => (tok (Tokens.STRUCTURE, source, yypos, yypos + 9));
+<INITIAL>{id}   => (tok' (Tokens.ID, yytext, source, yypos));
 <INITIAL>{file} => (tok' (Tokens.FILE, yytext, source, yypos));
 
+<INITIAL>\"     => (charlist := [""]
+                    ; stringStart := Source.getPos (source, yypos)
+                    ; YYBEGIN S
+                    ; continue ());
 <INITIAL>"(*#line"{nrws}
                 => (YYBEGIN L
 		    ; commentStart := Source.getPos (source, yypos)
@@ -130,7 +151,7 @@
 		    ; (colNum := valOf (Int.fromString yytext))
 		      handle Overflow => YYBEGIN A
 	            ; continue ());
-<LL>.          => (YYBEGIN LLC; continue ()
+<LL>.           => (YYBEGIN LLC; continue ()
 		(* note hack, since ml-lex chokes on the empty string for 0* *));
 <LLC>"*)"       => (YYBEGIN INITIAL
 		    ; lineDirective (source, NONE, yypos + 2)
@@ -140,7 +161,8 @@
 <LLCQ>\""*)"    => (YYBEGIN INITIAL
                     ; lineDirective (source, SOME (!lineFile), yypos + 3)
                     ; commentLevel := 0; charlist := []; continue ());
-<L,LLC,LLCQ>"*)" => (YYBEGIN INITIAL; commentLevel := 0; charlist := []; continue ());
+<L,LLC,LLCQ>"*)" 
+                => (YYBEGIN INITIAL; commentLevel := 0; charlist := []; continue ());
 <L,LLC,LLCQ>.   => (YYBEGIN A; continue ());
 
 <A>"(*"		=> (inc commentLevel; continue ());
@@ -149,3 +171,74 @@
 		    ; if 0 = !commentLevel then YYBEGIN INITIAL else ()
 		    ; continue ());
 <A>.		=> (continue ());
+
+<S>\"	        => (let
+		       val s = concat (rev (!charlist))
+		       val _ = charlist := nil
+		       fun make (t, v) =
+			  t (v, !stringStart, Source.getPos (source, yypos + 1))
+                    in YYBEGIN INITIAL
+		       ; make (Tokens.FILE, s)
+                    end);
+<S>\\a		=> (addChar #"\a"; continue ());
+<S>\\b		=> (addChar #"\b"; continue ());
+<S>\\f		=> (addChar #"\f"; continue ());
+<S>\\n		=> (addChar #"\n"; continue ());
+<S>\\r		=> (addChar #"\r"; continue ());
+<S>\\t		=> (addChar #"\t"; continue ());
+<S>\\v		=> (addChar #"\v"; continue ());
+<S>\\\^[@-_]	=> (addChar (Char.chr(Char.ord(String.sub(yytext, 2))
+				      -Char.ord #"@"))
+                    ; continue ());
+<S>\\\^.	=> (error (source, yypos, yypos + 2,
+		           "illegal control escape; must be one of @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_")
+	            ; continue ());
+<S>\\[0-9]{3}	=> (let
+		       val x =
+			  Char.ord(String.sub(yytext, 1)) * 100
+			  + Char.ord(String.sub(yytext, 2)) * 10
+			  + Char.ord(String.sub(yytext, 3))
+			  - (Char.ord #"0") * 111
+		    in (if x > 255
+			   then stringError (source, yypos,
+					     "illegal ascii escape")
+			else addChar(Char.chr x);
+			   continue ())
+		    end);
+<S>\\u{hexDigit}{4} 
+                => (let
+		       val x = 
+			  StringCvt.scanString
+			  (Pervasive.Int.scan StringCvt.HEX)
+			  (String.substring (yytext, 2, 4))
+		       fun err () =
+			  stringError (source, yypos,
+				       "illegal unicode escape")
+		    in (case x of
+			  SOME x => if x > 255
+				       then err()
+				    else addChar(Char.chr x)
+			| _ => err())
+			; continue ()
+		    end);
+<S>\\\"		=> (addString "\""; continue ());
+<S>\\\\		=> (addString "\\"; continue ());
+<S>\\{nrws}   	=> (YYBEGIN F; continue ());
+<S>\\{eol}      => (Source.newline (source, yypos) ; YYBEGIN F ; continue ());   
+<S>\\		=> (stringError (source, yypos, "illegal string escape")
+		    ; continue ());
+<S>{eol}	=> (Source.newline (source, yypos)
+		    ; stringError (source, yypos, "unclosed string")
+		    ; continue ());
+<S>" "|[\033-\126]  
+                => (addString yytext; continue ());
+<S>.            => (stringError (source, yypos + 1, "illegal character in string")
+	            ; continue ());
+
+<F>{eol}        => (Source.newline (source, yypos) ; continue ());
+<F>{ws}		=> (continue ());
+<F>\\		=> (YYBEGIN S
+		    ; stringStart := Source.getPos (source, yypos)
+		    ; continue ());
+<F>.		=> (stringError (source, yypos, "unclosed string")
+		    ; continue ());