{ (* Lexer for a simple functional language (micro-ML) sestoft@dina.kvl.dk * 2001-02-21, 2002-03-10 *) open Lexing Funpar; exception LexicalError of string * int * int (* (message, loc1, loc2) *) fun lexerError lexbuf s = raise LexicalError (s, getLexemeStart lexbuf, getLexemeEnd lexbuf); val commentStart = ref 0; (* Start of outermost comment being scanned *) fun commentNotClosed lexbuf = raise LexicalError ("Comment not terminated", !commentStart, getLexemeEnd lexbuf); val commentDepth = ref 0; (* Current comment nesting *) (* Distinguish keywords from identifiers: *) fun keyword s = case s of "else" => ELSE | "end" => END | "false" => CSTBOOL false | "if" => IF | "in" => IN | "let" => LET | "not" => NOT | "then" => THEN | "true" => CSTBOOL true | _ => NAME s; } rule Token = parse [` ` `\t` `\n` `\r`] { Token lexbuf } | `~`?[`0`-`9`]+ { case Int.fromString (getLexeme lexbuf) of NONE => lexerError lexbuf "internal error" | SOME i => CSTINT i } | [`a`-`z``A`-`Z`][`a`-`z``A`-`Z``0`-`9`]* { keyword (getLexeme lexbuf) } | "(*" { commentStart := getLexemeStart lexbuf; commentDepth := 1; SkipComment lexbuf; Token lexbuf } | `=` { EQ } | "<>" { NE } | `>` { GT } | `<` { LT } | ">=" { GE } | "<=" { LE } | `+` { PLUS } | `-` { MINUS } | `*` { TIMES } | `/` { DIV } | `%` { MOD } | `(` { LPAR } | `)` { RPAR } | eof { EOF } | _ { lexerError lexbuf "Illegal symbol in input" } and SkipComment = parse "*)" { commentDepth := !commentDepth - 1; if !commentDepth = 0 then () else SkipComment lexbuf } | "(*" { commentDepth := !commentDepth + 1; SkipComment lexbuf } | (eof | `\^Z`) { commentNotClosed lexbuf } | _ { SkipComment lexbuf } ;