// Parser combinators in Generic C# // This program requires .Net version 2.0 // Peter Sestoft (sestoft@itu.dk) * 2001-11-13, 2001-11-19 // Compile GCollections.cs with // csc /t:module GCollections.cs // Then compile this file with // csc /addmodule:GCollections.netmodule Parsers.cs // No backtracking in alternatives (Alt) so far. // A parser is a function that reads from a source (a stream). It // either succeeds in parsing a prefix of the stream and then returns // a result together with the rest of the stream, or fails. // A parser returning a result of type T is an object of type // Parser, which has a function Parse(ISource) that returns a // Result. A result is either Succ(result, rest of source) or Fail. using System; using System.Text; // For StringBuilder using GCollections; // For LinkedList // Parser results -------------------------------------------------- interface Result { bool Success { get; } T Value { get; } ISource Source { get; } } class Succ : Result { T t; ISource src; public Succ(T t, ISource src) { this.t = t; this.src = src; } public bool Success { get { return true; } } public T Value { get { return t; } } public ISource Source { get { return src; } } } class Fail : Result { public bool Success { get { return false; } } public T Value { get { throw new InvalidOperationException(); } } public ISource Source { get { throw new InvalidOperationException(); } } } // Results: pairs ------------------------------------------------- class Pair { T t; U u; public Pair(T t, U u) { this.t = t; this.u = u; } public T Fst { get { return t; } } public U Snd { get { return u; } } } // Results: option type -------------------------------------------- class Option { T val; bool ok; public Option() { this.ok = false; } public Option(T val) { this.ok = true; this.val = val; } public bool Ok { get { return ok; } } public T Value { get { if (ok) return val; else throw new InvalidOperationException(); } } } // Sources: functional streams ------------------------------------- interface ISource { ISource MoveNext(); // returns null if at end char Current { get; } } struct StringSource : ISource { // invariant: i < s.Length readonly string s; readonly int i; public StringSource(string s) { this.s = s; this.i = -1; } private StringSource(string s, int i) { this.s = s; this.i = i; } public char Current { get { if (i < 0) throw new InvalidOperationException(); else return s[i]; } } public ISource MoveNext() { if (i+1 < s.Length) return new StringSource(s, i+1); else return null; } } // Parsers --------------------------------------------------------- interface Parser { Result Parse(ISource src); } // Always succeeds class Success : Parser { T result; public Success(T result) { this.result = result; } public Result Parse(ISource src) { return new Succ(result, src); } } // Always fails class Failure : Parser { public Failure() { } public Result Parse(ISource src) { return new Fail(); } } // Parse a T, then a U, and return the pair (T,U) class Seq : Parser > { Parser tp; Parser up; public Seq(Parser tp, Parser up) { this.tp = tp; this.up = up; } public Result > Parse(ISource src) { Result tr = tp.Parse(src); if (tr.Success) { Result ur = up.Parse(tr.Source); if (ur.Success) return new Succ >(new Pair(tr.Value, ur.Value), ur.Source); } return new Fail >(); } } // Parse a T, then a U, and return the T class SeqFst : Parser { Parser tp; Parser up; public SeqFst(Parser tp, Parser up) { this.tp = tp; this.up = up; } public Result Parse(ISource src) { Result tr = tp.Parse(src); if (tr.Success) { Result ur = up.Parse(tr.Source); if (ur.Success) return new Succ(tr.Value, ur.Source); } return new Fail(); } } // Parse a T, then U, and return the U class SeqSnd : Parser { Parser tp; Parser up; public SeqSnd(Parser tp, Parser up) { this.tp = tp; this.up = up; } public Result Parse(ISource src) { Result tr = tp.Parse(src); if (tr.Success) { Result ur = up.Parse(tr.Source); if (ur.Success) return new Succ(ur.Value, ur.Source); } return new Fail(); } } // Parse a T and succeed with result, or else succeed without class Opt : Parser< Option > { Parser tp; public Opt(Parser tp) { this.tp = tp; } public Result< Option > Parse(ISource src) { Result tr = tp.Parse(src); if (tr.Success) return new Succ< Option >(new Option(tr.Value), tr.Source); else return new Succ< Option >(new Option(), src); } } // Parse a T in one of several ways class Alt : Parser { Parser[] ps; public Alt(Parser tp, Parser up) { this.ps = new Parser[] { tp, up }; } public Alt(Parser[] ps) { this.ps = ps; } public Result Parse(ISource src) { foreach (Parser p in ps) { Result res = p.Parse(src); if (res.Success) return new Succ(res.Value, res.Source); } return new Fail(); } } // Parse zero or more T's, return list class Star : Parser > { Parser tp; public Star(Parser tp) { this.tp = tp; } public Result > Parse(ISource src) { LinkedList res = new LinkedList(); Result tr = tp.Parse(src); while (tr.Success) { res.AddLast(tr.Value); src = tr.Source; tr = tp.Parse(src); } return new Succ >(res, src); } } // Parse one or more T's, return list class Plus : Star { public Plus(Parser tp) : base(tp) { } public new Result > Parse(ISource src) { Result > res = base.Parse(src); if (res is Succ > && res.Value.Count >= 1) return res; else return new Fail >(); } } // Parse a character satisfying Ok(c) abstract class ParseTestChar : Parser { public Result Parse(ISource src) { ISource src1 = src.MoveNext(); if (src1 != null) if (Ok(src1.Current)) return new Succ(src1.Current, src1); return new Fail(); } public abstract bool Ok(char c); } // Parse a digit / parse a comma class Digit : ParseTestChar { public override bool Ok(char c) { return System.Char.IsDigit(c); } } class Comma : ParseTestChar { public override bool Ok(char c) { return c == ','; } } // Check for a particular character class ParseChar : ParseTestChar { char c; public ParseChar(char c) { this.c = c; } public override bool Ok(char c) { return this.c == c; } } // Parse the longest (possibly empty) prefix of characters satisfying Ok abstract class ParseChars0 : Parser { public virtual Result Parse(ISource src) { StringBuilder sb = new StringBuilder(); ISource src1 = src; ISource src2 = src1.MoveNext(); while (src2 != null && Ok(src2.Current)) { sb.Append(src2.Current); src1 = src2; src2 = src1.MoveNext(); } return new Succ(sb.ToString(), src1); } public abstract bool Ok(char c); } // Parse zero or more whitespace characters class ParseWS : ParseChars0 { public override bool Ok(char c) { return System.Char.IsWhiteSpace(c); } } // Skip initial whitespace, then parse a T class SkipWS : SeqSnd { static readonly Parser parseWS = new ParseWS(); public SkipWS(Parser tp) : base(parseWS, tp) { } } // Parse the longest non-empty prefix of characters satisfying Ok abstract class ParseChars1 : ParseChars0 { public override Result Parse(ISource src) { Result res = base.Parse(src); if (res.Success && res.Value.Length > 0) return res; else return new Fail(); } } // Parse an integer; note the combination of ParseChars1 and Transform class ParseInt : Transform /* , Parser */ { public ParseInt() : base(new ParseIntString()) { } public override int Trans(string s) { return int.Parse(s); } private class ParseIntString : ParseChars1 { public override bool Ok(char c) { return System.Char.IsDigit(c); } } } // Parse the given string lit class ParseLit : Parser { string lit; public ParseLit(string lit) { this.lit = lit; } public Result Parse(ISource src) { ISource src1 = src; ISource src2 = src1.MoveNext(); int i=0; while (i < lit.Length && src2 != null && src2.Current == lit[i]) { src1 = src2; src2 = src1.MoveNext(); i++; } if (i >= lit.Length) return new Succ(lit, src1); else return new Fail(); } } // Parse a T and transform it to a U abstract class Transform : Parser { Parser tp; public Transform(Parser tp) { this.tp = tp; } public Result Parse(ISource src) { Result tr = tp.Parse(src); if (tr.Success) return new Succ(Trans(tr.Value), tr.Source); return new Fail(); } public abstract U Trans(T t); } // A protoparser, or recursive parser builder interface IProtoParser { Parser Build(Parser parser); } // Create a recursive parser from a protoparser by `tying the knot' class ParseRec : Parser { Parser tp; public ParseRec(IProtoParser pp) { tp = pp.Build(this); } public Result Parse(ISource src) { return tp.Parse(src); } } // Parse an int as a pair (digit, list of digits), and transform to int // Possible, but not the right way to do it. class ToInt : Transform >, int> { public ToInt(Parser > > p) : base(p) { } public override int Trans(Pair > p) { int res = p.Fst - '0'; IEnumerator rest = p.Snd.GetEnumerator(); while (rest.MoveNext()) res = 10 * res + rest.Current - '0'; return res; } } // ---------------------------------------------------------------------- public class TestParser { static void Main(string[] args) { if (args.Length != 1) Console.WriteLine("Usage: Parsers \n"); else TestTree(args[0]); // IntParse(args); } static void IntParse(string[] args) { // Seq(Digit, Star(Digit)) --> int Parser intparser = new ToInt(new Seq > (new Digit(), new Star(new Digit()))); ISource src1 = new StringSource(args[0]); Console.WriteLine(2 * intparser.Parse(src1).Value); Console.WriteLine(2 * new ParseInt().Parse(src1).Value); Parser > commaintparser = new Star(new SeqSnd(new Comma(), intparser)); ISource src2 = new StringSource(args[1]); LinkedList ints = commaintparser.Parse(src2).Value; int sum = 0; foreach (int i in ints) sum += i; Console.WriteLine(sum); } // Parsing Lisp trees: tree ::= atom | number | ( tree * ) static void TestTree(string s) { ISource src = new StringSource(s); Parser tparse = new ParseRec(new TreeProtoParser()); Result res = tparse.Parse(src); if (res.Success) Console.WriteLine(res.Value); else Console.WriteLine("Parse failure"); } } // Tree protoparser; we use ParseRec to make it recursive (in fact, to // find its fixed-point) class TreeProtoParser : IProtoParser { public Parser Build(Parser tparse) { Parser parseList = new SeqSnd( new ParseChar('('), new SeqFst(new ParseList(tparse), new SkipWS(new ParseChar(')')))); return new SkipWS( new Alt( new Parser[] { new ParseNumber(), new ParseAtom(), parseList })); } } // Parsing the three kinds of trees class ParseNumber : Transform { public ParseNumber() : base(new ParseInt()) { } public override Tree Trans(int i) { return new Number(i); } } class ParseAtom : Transform { public ParseAtom() : base(new ParseSymbol()) { } public override Tree Trans(string s) { return new Atom(s); } private class ParseSymbol : ParseChars1 { public override bool Ok(char c) { return System.Char.IsLetterOrDigit(c); } } } class ParseList : Transform, Tree> { public ParseList(Parser tparser) : base(new Star(tparser)) { } public override Tree Trans(LinkedList trees) { return new List(trees); } } // Representing trees abstract class Tree { } class Number : Tree { private readonly int v; public Number(int v) { this.v = v; } public override string ToString() { return v.ToString(); } } class Atom : Tree { private string atom; public Atom(string atom) { this.atom = atom; } public override string ToString() { return atom; } } class List : Tree { protected readonly LinkedList elems; public List(LinkedList elems) { this.elems = elems; } public override string ToString() { StringBuilder sb = new StringBuilder(); sb.Append("("); bool first = true; foreach (Tree t in elems) { if (!first) sb.Append(" "); first = false; sb.Append(t); } sb.Append(")"); return sb.ToString(); } }