// RegExp -> NFA -> DFA -> Graph in Generic C# // This program requires .Net version 2.0. // Peter Sestoft (sestoft@itu.dk) // Java 2000-10-07, GC# 2001-10-23, 2003-09-03, 2004-07-26, 2006-03-05 // This file contains, in order: // * A class Nfa for representing an NFA (a nondeterministic finite // automaton), and for converting it to a DFA (a deterministic // finite automaton). Most complexity is in this class. // * A class Dfa for representing a DFA, a deterministic finite // automaton, and for writing a dot input file representing the DFA. // * Classes for representing regular expressions, and for building an // NFA from a regular expression // * A test class that creates an NFA, a DFA, and a dot input file // for a number of small regular expressions. The DFAs are // not minimized. using System; using System.Text; using System.Collections; using System.Collections.Generic; using System.IO; // A set represented as the collection of keys of a Dictionary class Set : ICollection { // Only the keys matter; the type bool used for the value is arbitrary private Dictionary dict; public Set() { dict = new Dictionary(); } public Set(T x) : this() { Add(x); } public Set(IEnumerable coll) : this() { foreach (T x in coll) Add(x); } public Set(T[] arr) : this() { foreach (T x in arr) Add(x); } public bool Contains(T x) { return dict.ContainsKey(x); } public void Add(T x) { if (!Contains(x)) dict.Add(x, false); } public bool Remove(T x) { return dict.Remove(x); } public void Clear() { dict.Clear(); } public bool IsReadOnly { get { return false; } } public IEnumerator GetEnumerator() { return dict.Keys.GetEnumerator(); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } public int Count { get { return dict.Count; } } public void CopyTo(T[] arr, int i) { dict.Keys.CopyTo(arr, i); } // Is this set a subset of that? public bool Subset(Set that) { foreach (T x in this) if (!that.Contains(x)) return false; return true; } // Create new set as intersection of this and that public Set Intersection(Set that) { Set res = new Set(); foreach (T x in this) if (that.Contains(x)) res.Add(x); return res; } // Create new set as union of this and that public Set Union(Set that) { Set res = new Set(this); foreach (T x in that) res.Add(x); return res; } // Compute hash code -- should be cached for efficiency public override int GetHashCode() { int res = 0; foreach (T x in this) res ^= x.GetHashCode(); return res; } public override bool Equals(Object that) { if (that is Set) { Set thatSet = (Set)that; return thatSet.Count == this.Count && thatSet.Subset(this) && this.Subset(thatSet); } else return false; } public override String ToString() { StringBuilder res = new StringBuilder(); res.Append("{ "); bool first = true; foreach (T x in this) { if (!first) res.Append(", "); res.Append(x); first = false; } res.Append(" }"); return res.ToString(); } } // ---------------------------------------------------------------------- // Regular expressions, NFAs, DFAs, and dot graphs // sestoft@itu.dk // Java 2001-07-10 * C# 2001-10-22 * Gen C# 2001-10-23, 2003-09-03 // In the Generic C# 2.0 version we // use Queue and Queue> for worklists // use Set for pre-DFA states // use List for NFA transition relations // use Dictionary, Dictionary>> // and Dictionary> for DFA transition relations /* Class Nfa and conversion from NFA to DFA --------------------------- A nondeterministic finite automaton (NFA) is represented as a Map from state number (int) to a List of Transitions, a Transition being a pair of a label lab (a String, null meaning epsilon) and a target state (an int). A DFA is created from an NFA in two steps: (1) Construct a DFA whose each of whose states is composite, namely a set of NFA states (Set of int). This is done by methods CompositeDfaTrans and EpsilonClose. (2) Replace composite states (Set of int) by simple states (int). This is done by methods Rename and MkRenamer. Method CompositeDfaTrans works as follows: Create the epsilon-closure S0 (a Set of ints) of the start state s0, and put it in a worklist (a Queue). Create an empty DFA transition relation, which is a Map from a composite state (an epsilon-closed Set of ints) to a Map from a label (a non-null String) to a composite state. Repeatedly choose a composite state S from the worklist. If it is not already in the keyset of the DFA transition relation, compute for every non-epsilon label lab the set T of states reachable by that label from some state s in S. Compute the epsilon-closure Tclose of every such state T and put it on the worklist. Then add the transition S -lab-> Tclose to the DFA transition relation, for every lab. Method EpsilonClose works as follows: Given a set S of states. Put the states of S in a worklist. Repeatedly choose a state s from the worklist, and consider all epsilon-transitions s -eps-> s' from s. If s' is in S already, then do nothing; otherwise add s' to S and the worklist. When the worklist is empty, S is epsilon-closed; return S. Method MkRenamer works as follows: Given a Map from Set of int to something, create an injective Map from Set of int to int, by choosing a fresh int for every value of the map. Method Rename works as follows: Given a Map from Set of int to Map from String to Set of int, use the result of MkRenamer to replace all Sets of ints by ints. */ class Nfa { private readonly int startState; private readonly int exitState; // This is the unique accept state private readonly IDictionary> trans; public Nfa(int startState, int exitState) { this.startState = startState; this.exitState = exitState; trans = new Dictionary>(); if (!startState.Equals(exitState)) trans.Add(exitState, new List()); } public int Start { get { return startState; } } public int Exit { get { return exitState; } } public IDictionary> Trans { get { return trans; } } public void AddTrans(int s1, String lab, int s2) { List s1Trans; if (trans.ContainsKey(s1)) s1Trans = trans[s1]; else { s1Trans = new List(); trans.Add(s1, s1Trans); } s1Trans.Add(new Transition(lab, s2)); } public void AddTrans(KeyValuePair> tr) { // Assumption: if tr is in trans, it maps to an empty list (end state) trans.Remove(tr.Key); trans.Add(tr.Key, tr.Value); } public override String ToString() { return "NFA start=" + startState + " exit=" + exitState; } // Construct the transition relation of a composite-state DFA // from an NFA with start state s0 and transition relation // trans (a Map from int to List of Transition). The start // state of the constructed DFA is the epsilon closure of s0, // and its transition relation is a Map from a composite state // (a Set of ints) to a Map from label (a String) to a // composite state (a Set of ints). static IDictionary, IDictionary>> CompositeDfaTrans(int s0, IDictionary> trans) { Set S0 = EpsilonClose(new Set(s0), trans); Queue> worklist = new Queue>(); worklist.Enqueue(S0); // The transition relation of the DFA IDictionary, IDictionary>> res = new Dictionary, IDictionary>>(); while (worklist.Count != 0) { Set S = worklist.Dequeue(); if (!res.ContainsKey(S)) { // The S -lab-> T transition relation being constructed for a given S IDictionary> STrans = new Dictionary>(); // For all s in S, consider all transitions s -lab-> t foreach (int s in S) { // For all non-epsilon transitions s -lab-> t, add t to T foreach (Transition tr in trans[s]) { if (tr.lab != null) { // Already a transition on lab Set toState; if (STrans.ContainsKey(tr.lab)) toState = STrans[tr.lab]; else { // No transitions on lab yet toState = new Set(); STrans.Add(tr.lab, toState); } toState.Add(tr.target); } } } // Epsilon-close all T such that S -lab-> T, and put on worklist Dictionary> STransClosed = new Dictionary >(); foreach (KeyValuePair> entry in STrans) { Set Tclose = EpsilonClose(entry.Value, trans); STransClosed.Add(entry.Key, Tclose); worklist.Enqueue(Tclose); } res.Add(S, STransClosed); } } return res; } // Compute epsilon-closure of state set S in transition relation trans. static Set EpsilonClose(Set S, IDictionary> trans) { // The worklist initially contains all S members Queue worklist = new Queue(S); Set res = new Set(S); while (worklist.Count != 0) { int s = worklist.Dequeue(); foreach (Transition tr in trans[s]) { if (tr.lab == null && !res.Contains(tr.target)) { res.Add(tr.target); worklist.Enqueue(tr.target); } } } return res; } // Compute a renamer, which is a Map from Set of int to int static IDictionary, int> MkRenamer(ICollection> states) { IDictionary, int> renamer = new Dictionary, int>(); int count = 0; foreach (Set k in states) renamer.Add(k, count++); return renamer; } // Using a renamer (a Map from Set of int to int), replace // composite (Set of int) states with simple (int) states in // the transition relation trans, which is assumed to be a Map // from Set of int to Map from String to Set of int. The // result is a Map from int to Map from String to int. static IDictionary> Rename(IDictionary, int> renamer, IDictionary, IDictionary>> trans) { IDictionary> newtrans = new Dictionary>(); foreach (KeyValuePair, IDictionary>> entry in trans) { Set k = entry.Key; IDictionary newktrans = new Dictionary(); foreach (KeyValuePair> tr in entry.Value) newktrans.Add(tr.Key, renamer[tr.Value]); newtrans.Add(renamer[k], newktrans); } return newtrans; } static Set AcceptStates(ICollection> states, IDictionary, int> renamer, int exit) { Set acceptStates = new Set(); foreach (Set state in states) if (state.Contains(exit)) acceptStates.Add(renamer[state]); return acceptStates; } public Dfa ToDfa() { IDictionary, IDictionary>> cDfaTrans = CompositeDfaTrans(startState, trans); Set cDfaStart = EpsilonClose(new Set(startState), trans); ICollection> cDfaStates = cDfaTrans.Keys; IDictionary, int> renamer = MkRenamer(cDfaStates); IDictionary> simpleDfaTrans = Rename(renamer, cDfaTrans); int simpleDfaStart = renamer[cDfaStart]; Set simpleDfaAccept = AcceptStates(cDfaStates, renamer, exitState); return new Dfa(simpleDfaStart, simpleDfaAccept, simpleDfaTrans); } // Nested class for creating distinctly named states when constructing NFAs public class NameSource { private static int nextName = 0; public int next() { return nextName++; } } } // Class Transition, a transition from one state to another ---------- public class Transition { public String lab; public int target; public Transition(String lab, int target) { this.lab = lab; this.target = target; } public override String ToString() { return "-" + lab + "-> " + target; } } // Class Dfa, deterministic finite automata -------------------------- /* A deterministic finite automaton (DFA) is represented as a Map from state number (int) to a Map from label (a String, non-null) to a target state (an int). */ class Dfa { private readonly int startState; private readonly Set acceptStates; private readonly IDictionary> trans; public Dfa(int startState, Set acceptStates, IDictionary> trans) { this.startState = startState; this.acceptStates = acceptStates; this.trans = trans; } public int Start { get { return startState; } } public Set Accept { get { return acceptStates; } } public IDictionary> Trans { get { return trans; } } public override String ToString() { return "DFA start=" + startState + "\naccept=" + acceptStates; } // Write an input file for the dot program. You can find dot at // http://www.research.att.com/sw/tools/graphviz/ public void WriteDot(String filename) { TextWriter wr = new StreamWriter(new FileStream(filename, FileMode.Create, FileAccess.Write)); wr.WriteLine("// Format this file as a Postscript file with "); wr.WriteLine("// dot " + filename + " -Tps -o out.ps\n"); wr.WriteLine("digraph dfa {"); wr.WriteLine("size=\"11,8.25\";"); wr.WriteLine("rotate=90;"); wr.WriteLine("rankdir=LR;"); wr.WriteLine("n999999 [style=invis];"); // Invisible start node wr.WriteLine("n999999 -> n" + startState); // Edge into start state // Accept states are double circles foreach (int state in trans.Keys) if (acceptStates.Contains(state)) wr.WriteLine("n" + state + " [peripheries=2];"); // The transitions foreach (KeyValuePair> entry in trans) { int s1 = entry.Key; foreach (KeyValuePair s1Trans in entry.Value) { String lab = s1Trans.Key; int s2 = s1Trans.Value; wr.WriteLine("n" + s1 + " -> n" + s2 + " [label=\"" + lab + "\"];"); } } wr.WriteLine("}"); wr.Close(); } } // Regular expressions ---------------------------------------------- // // Abstract syntax of regular expressions // r ::= A | r1 r2 | (r1|r2) | r* // abstract class Regex { abstract public Nfa MkNfa(Nfa.NameSource names); } class Eps : Regex { // The resulting nfa0 has form s0s -eps-> s0e public override Nfa MkNfa(Nfa.NameSource names) { int s0s = names.next(); int s0e = names.next(); Nfa nfa0 = new Nfa(s0s, s0e); nfa0.AddTrans(s0s, null, s0e); return nfa0; } } class Sym : Regex { String sym; public Sym(String sym) { this.sym = sym; } // The resulting nfa0 has form s0s -sym-> s0e public override Nfa MkNfa(Nfa.NameSource names) { int s0s = names.next(); int s0e = names.next(); Nfa nfa0 = new Nfa(s0s, s0e); nfa0.AddTrans(s0s, sym, s0e); return nfa0; } } class Seq : Regex { Regex r1, r2; public Seq(Regex r1, Regex r2) { this.r1 = r1; this.r2 = r2; } // If nfa1 has form s1s ----> s1e // and nfa2 has form s2s ----> s2e // then nfa0 has form s1s ----> s1e -eps-> s2s ----> s2e public override Nfa MkNfa(Nfa.NameSource names) { Nfa nfa1 = r1.MkNfa(names); Nfa nfa2 = r2.MkNfa(names); Nfa nfa0 = new Nfa(nfa1.Start, nfa2.Exit); foreach (KeyValuePair> entry in nfa1.Trans) nfa0.AddTrans(entry); foreach (KeyValuePair> entry in nfa2.Trans) nfa0.AddTrans(entry); nfa0.AddTrans(nfa1.Exit, null, nfa2.Start); return nfa0; } } class Alt : Regex { Regex r1, r2; public Alt(Regex r1, Regex r2) { this.r1 = r1; this.r2 = r2; } // If nfa1 has form s1s ----> s1e // and nfa2 has form s2s ----> s2e // then nfa0 has form s0s -eps-> s1s ----> s1e -eps-> s0e // s0s -eps-> s2s ----> s2e -eps-> s0e public override Nfa MkNfa(Nfa.NameSource names) { Nfa nfa1 = r1.MkNfa(names); Nfa nfa2 = r2.MkNfa(names); int s0s = names.next(); int s0e = names.next(); Nfa nfa0 = new Nfa(s0s, s0e); foreach (KeyValuePair> entry in nfa1.Trans) nfa0.AddTrans(entry); foreach (KeyValuePair> entry in nfa2.Trans) nfa0.AddTrans(entry); nfa0.AddTrans(s0s, null, nfa1.Start); nfa0.AddTrans(s0s, null, nfa2.Start); nfa0.AddTrans(nfa1.Exit, null, s0e); nfa0.AddTrans(nfa2.Exit, null, s0e); return nfa0; } } class Star : Regex { Regex r; public Star(Regex r) { this.r = r; } // If nfa1 has form s1s ----> s1e // then nfa0 has form s0s ----> s0s // s0s -eps-> s1s // s1e -eps-> s0s public override Nfa MkNfa(Nfa.NameSource names) { Nfa nfa1 = r.MkNfa(names); int s0s = names.next(); Nfa nfa0 = new Nfa(s0s, s0s); foreach (KeyValuePair> entry in nfa1.Trans) nfa0.AddTrans(entry); nfa0.AddTrans(s0s, null, nfa1.Start); nfa0.AddTrans(nfa1.Exit, null, s0s); return nfa0; } } // Trying the RE->NFA->DFA translation on three regular expressions class TestNFA { public static void Main(String[] args) { Regex a = new Sym("A"); Regex b = new Sym("B"); Regex c = new Sym("C"); Regex abStar = new Star(new Alt(a, b)); Regex bb = new Seq(b, b); Regex r = new Seq(abStar, new Seq(a, b)); // The regular expression (a|b)*ab BuildAndShow("dfa1.dot", r); // The regular expression ((a|b)*ab)* BuildAndShow("dfa2.dot", new Star(r)); // The regular expression ((a|b)*ab)((a|b)*ab) BuildAndShow("dfa3.dot", new Seq(r, r)); // The regular expression (a|b)*abb, from ASU 1986 p 136 BuildAndShow("dfa4.dot", new Seq(abStar, new Seq(a, bb))); // SML reals: sign?((digit+(\.digit+)?))([eE]sign?digit+)? Regex d = new Sym("digit"); Regex dPlus = new Seq(d, new Star(d)); Regex s = new Sym("sign"); Regex sOpt = new Alt(s, new Eps()); Regex dot = new Sym("."); Regex dotDigOpt = new Alt(new Eps(), new Seq(dot, dPlus)); Regex mant = new Seq(sOpt, new Seq(dPlus, dotDigOpt)); Regex e = new Sym("e"); Regex exp = new Alt(new Eps(), new Seq(e, new Seq(sOpt, dPlus))); Regex smlReal = new Seq(mant, exp); BuildAndShow("dfa5.dot", smlReal); } public static void BuildAndShow(String filename, Regex r) { Nfa nfa = r.MkNfa(new Nfa.NameSource()); Console.WriteLine(nfa); Console.WriteLine("---"); Dfa dfa = nfa.ToDfa(); Console.WriteLine(dfa); Console.WriteLine("Writing DFA graph to file " + filename); dfa.WriteDot(filename); Console.WriteLine(); } }