// Example 98 from page 81 of Java Precisely edition 1 (The MIT Press 2002)
// Author: Peter Sestoft (sestoft@dina.kvl.dk)

import java.util.*;
import java.io.*;

// Regular expressions, NFAs, DFAs, and dot graphs

// This file contains, in order:
//   * A class Nfa for representing an NFA (a nondeterministic finite 
//     automaton), and for converting it to a DFA (a deterministic 
//     finite automaton).  Most complexity is in this class.
//   * A class Dfa for representing a DFA, a deterministic finite 
//     automaton, and for writing a dot input file representing the DFA.
//   * Classes for representing regular expressions, and for building an 
//     NFA from a regular expression
//   * A test class that creates an NFA, a DFA, and a dot input file 
//     for a number of small regular expressions.  The DFAs are 
//     not minimized.

/* Class Nfa and conversion from NFA to DFA ---------------------------

  A nondeterministic finite automaton (NFA) is represented as a HashMap
  from state number (Integer) to a List of Transitions, a Transition
  being a pair of a label lab (a String, null meaning epsilon) and a
  target state (an Integer).

  A DFA is created from an NFA in two steps:

    (1) Construct a DFA whose states are composite, namely sets of NFA
        states (Set of Integer).  This is done by methods 
        compositeDfaTrans and epsilonClose.

    (2) Replace composite states (Set of Integer) by simple states
        (Integer).  This is done by methods rename and mkRenamer.

  Method compositeDfaTrans works as follows: 

    Create the epsilon-closure S0 (a HashSet of Integers) of the start
    state s0, and put it in a worklist.  Create an empty DFA transition
    relation, which is a HashMap from a composite state (an 
    epsilon-closed HashSet of Integers) to a HashMap from a label 
    (a non-null String) to a composite state.

    Repeatedly choose a composite state S from the worklist.  If it is
    not already in the keyset of the DFA transition relation, compute
    for every non-epsilon label lab the set T of states reachable by
    that label from some state s in S.  Compute the epsilon-closure
    Tclose of every such state T and put it on the worklist.  Then add
    the transition S -lab-> Tclose to the DFA transition relation, for
    every lab.

  Method epsilonClosure works as follows: 

    Given a set S of states.  Put the states of S in a worklist.
    Repeatedly choose a state s from the worklist, and consider all
    epsilon-transitions s -eps-> s' from s.  If s' is in S already, then
    do nothing; otherwise add s' to S and the worklist.  When the
    worklist is empty, S is epsilon-closed; return S.

  Method mkRenamer works as follows: 

    Given a Map from Set of Integer to something, create an injective
    Map from Set of Integer to Integer, by choosing a fresh Integer for
    every value of the map.

  Method rename works as follows:

    Given a Map from Set of Integer to Map from String to Set of
    Integer, use the result of mkRenamer to replace all Sets of Integers
    by Integers.  

*/

class Nfa {
  private Integer startState;
  private Integer exitState;    // This is the unique accept state
  private Map /* from Integer to List of Transition */ trans;

  public Nfa(Integer startState, Integer exitState) {
    this.startState = startState; this.exitState = exitState;
    trans = new HashMap();
    if (!startState.equals(exitState))
      trans.put(exitState, new LinkedList());
  }

  public Integer getStart() { return startState; }

  public Integer getExit() { return exitState; }

  public Map getTrans() { return trans; }

  public void addTrans(Integer s1, String lab, Integer s2) {
    List /* of Transition */ s1Trans;
    if (trans.containsKey(s1)) 
      s1Trans = (List)trans.get(s1);
    else {
      s1Trans = new LinkedList();
      trans.put(s1, s1Trans);
    }
    s1Trans.add(new Transition(lab, s2));
  }

  public void addTrans(Map.Entry /* from Integer to List of Transition */ tr) {
    // assert !trans.containsKey((Integer)tr.getKey());
    trans.put((Integer)tr.getKey(), (List)tr.getValue());
  }

  public String toString() {
    return "NFA start=" + startState + " exit=" + exitState + "\n" + trans;
  }

  // Construct the transition relation of a composite-state DFA from
  // an NFA with start state s0 and transition relation trans (a
  // HashMap from Integer to List of Transition).  The start state of
  // the constructed DFA is the epsilon closure of s0, and its
  // transition relation is a HashMap from a composite state (a
  // HashSet of Integers) to a HashMap from label (a String) to a
  // composite state (a HashSet of Integers).

  static Map compositeDfaTrans(Integer s0, Map trans) {
    Set /* of Integer */ S0 = epsilonClose(Collections.singleton(s0), trans);
    LinkedList /* of Set of Integer */ worklist = new LinkedList();
    worklist.add(S0);
    // The transition relation of the DFA
    Map /* from Set of Integer to Map from String to Set of Integer */ res = 
      new HashMap(); 
    while (!worklist.isEmpty()) {
      Set /* of Integer */ S = (Set)worklist.removeFirst();
      if (!res.containsKey(S)) {
        // The S -lab-> T transition relation being constructed for a given S
        Map /* from String to Set of Integer */ STrans = new HashMap(); 
        // For all s in S, consider all transitions s -lab-> t
        Iterator SIter = S.iterator();
        while (SIter.hasNext()) {
          Integer s = (Integer)SIter.next();
          // For all non-epsilon transitions s -lab-> t, add t to T
          Iterator labIter = ((List)trans.get(s)).iterator();
          while (labIter.hasNext()) {
            Transition tr = (Transition)labIter.next();
            if (tr.lab != null) {       // Already a transition on lab
              HashSet /* of Integer */ toState; 
              if (STrans.containsKey(tr.lab)) 
                toState = (HashSet)STrans.get(tr.lab);
              else {                    // No transitions on lab yet
                toState = new HashSet();
                STrans.put(tr.lab, toState);
              }
              toState.add(tr.target);
            }
          }
        }
        // Epsilon-close all T such that S -lab-> T, and put on worklist
        HashMap /* from String to Set of Integer */ STransClosed = 
	  new HashMap();
        Iterator tIter = STrans.entrySet().iterator();
        while (tIter.hasNext()) {
          Map.Entry entry = (Map.Entry)tIter.next();
          Set /* of Integer */ Tclose = 
	    epsilonClose((Set)entry.getValue(), trans);
          STransClosed.put((String)entry.getKey(), Tclose);
          worklist.add(Tclose);
        }
        res.put(S, STransClosed);
      }
    }
    return res;
  }  

  // Compute epsilon-closure of state set S in transition relation trans.  
  // Parameter S is a Set of Integer.
  // Parameter trans is a HashMap from Integer to List of Transition.
  // The result is a Set of Integer.

  static Set epsilonClose(Set S, Map trans) {
    LinkedList /* of Integer */ worklist = new LinkedList(S);
    Set /* of Integer */ res = new HashSet(S);
    while (!worklist.isEmpty()) {
      Integer s = (Integer)worklist.removeFirst();
      Iterator sTrans = ((List)trans.get(s)).iterator();
      while (sTrans.hasNext()) {
        Transition tr = (Transition)sTrans.next();
        if (tr.lab == null && !res.contains(tr.target)) {
          res.add(tr.target);
          worklist.add(tr.target);
        }
      }
    }
    return res;
  }

  // Compute a renamer, which is a Map from Set of Integer to Integer,
  // provided parameter states is a Collection of Set of Integer.

  static Map mkRenamer(Collection states) {
    Map /* from Set of Integer to Integer */ renamer = new HashMap();
    Iterator iter = states.iterator();
    while (iter.hasNext()) {
      Set /* of Integer */ k = (Set)iter.next();
      renamer.put(k, new Integer(renamer.size()));
    }
    return renamer;
  }

  // Using a renamer (a Map from Set of Integer to Integer), replace
  // composite (Set of Integer) states with simple (Integer) states in
  // the transition relation trans, which is assumed to be a Map from
  // Set of Integer to Map from String to Set of Integer.  The result
  // is a Map from Integer to Map from String to Integer.

  static Map rename(Map renamer, Map trans) {
    Map /* from Integer to Map from String to Integer */ newtrans = 
      new HashMap();
    Iterator iter = trans.entrySet().iterator();
    while (iter.hasNext()) {
      Map.Entry entry = (Map.Entry)iter.next();
      Set /* of Integer */ k = (Set)entry.getKey();
      Map /* from String to Integer */ newktrans = new HashMap();
      Iterator ktrans = ((Map)entry.getValue()).entrySet().iterator();
      while (ktrans.hasNext()) {
        Map.Entry tr = (Map.Entry)ktrans.next();
        newktrans.put(tr.getKey(), (Integer)renamer.get(tr.getValue()));
      }
      newtrans.put(renamer.get(k), newktrans);
    }
    return newtrans;
  }

  static Set acceptStates(Set /* of Set of Integer */ states, 
                          Map /* from Set of Integer to Integer */ renamer,
                          Integer exit) {
    Set /* of Integer */ acceptStates = new HashSet();
    Iterator stateIter = states.iterator();
    while (stateIter.hasNext()) {
      Set /* of Integer */ state = (Set)stateIter.next();
      if (state.contains(exit)) 
        acceptStates.add((Integer)renamer.get(state));
    }
    return acceptStates;
  }

  public Dfa toDfa() {
    Map cDfaTrans = compositeDfaTrans(startState, trans);
    Set cDfaStart = epsilonClose(Collections.singleton(startState), trans);
    Set cDfaStates = cDfaTrans.keySet();
    Map renamer = mkRenamer(cDfaStates);
    Map simpleDfaTrans = rename(renamer, cDfaTrans);
    Integer simpleDfaStart = (Integer)(renamer.get(cDfaStart));
    Set simpleDfaAccept = acceptStates(cDfaStates, renamer, exitState);
    return new Dfa(simpleDfaStart, simpleDfaAccept, simpleDfaTrans);
  }

  // Nested class for creating distinctly named states when constructing NFAs

  static class NameSource {
    private int nextName = 0;
    public Integer next() { return new Integer(nextName++); }
  }

  // Nested class for representing a transition from one state to another

  static class Transition {
    String lab;
    Integer target;
    
    public Transition(String lab, Integer target) 
    { this.lab = lab; this.target = target; }
    
    public String toString() {
      return "-" + lab + "-> " + target;
    }
  }
}


// Class Dfa, deterministic finite automata --------------------------

/*
  A deterministic finite automaton (DFA) is represented as a HashMap
  from state number (Integer) to a HashMap from label (a String,
  non-null) to a target state (an Integer).
*/

class Dfa {
  private Integer startState;
  private Set /* of Integer */ acceptStates;
  private Map /* from Integer to Map from String to Integer */ trans;

  public Dfa(Integer startState, Set acceptStates, Map trans) {
    this.startState = startState; 
    this.acceptStates = acceptStates;
    this.trans = trans;
  }
  
  public Integer getStart() { return startState; }

  public Set getAccept() { return acceptStates; }

  public Map getTrans() { return trans; }

  public String toString() {
    return "DFA start=" + startState + "\naccept=" + acceptStates 
      + "\n" + trans;
  }

  // Write an input file for the dot program.  You can find dot at
  // http://www.research.att.com/sw/tools/graphviz/

  public void writeDot(String filename) throws IOException {
    PrintWriter out = new PrintWriter(new FileWriter(filename));
    out.println("// Format this file as a Postscript file with ");
    out.println("//    dot " + filename + " -Tps -o out.ps\n");
    out.println("digraph dfa {");
    out.println("size=\"11,8.25\";");
    out.println("rotate=90;");
    out.println("rankdir=LR;");
    out.println("n999999 [style=invis];");    // Invisible start node
    out.println("n999999 -> n" + startState); // Edge into start state
    
    // Accept states are double circles
    Iterator stateIter = trans.keySet().iterator();
    while (stateIter.hasNext()) {
      Integer state = (Integer)stateIter.next();
      if (acceptStates.contains(state))
        out.println("n" + state + " [peripheries=2];");
    }

    // The transitions 
    Iterator stateTransIter = trans.entrySet().iterator();
    while (stateTransIter.hasNext()) {
      Map.Entry entry = (Map.Entry)stateTransIter.next();
      Integer s1 = (Integer)entry.getKey();
      Iterator transIter = ((Map)entry.getValue()).entrySet().iterator();
      while (transIter.hasNext()) {
        Map.Entry s1Trans = (Map.Entry)transIter.next();
        String lab = (String) s1Trans.getKey();
        Integer s2 = (Integer)s1Trans.getValue();
        out.println("n" + s1 + " -> n" + s2 + " [label=" + lab + "];");
      }
    }
    out.println("}");
    out.close();
  }
}

// Regular expressions ----------------------------------------------
//
// Abstract syntax of regular expressions
//    r ::= A | r1 r2 | (r1|r2) | r*
//

abstract class Regex { 
  abstract public Nfa mkNfa(Nfa.NameSource names);
}

class Sym extends Regex {
  String sym;

  public Sym(String sym) { 
    this.sym = sym; 
  }

  // The resulting nfa0 has form s0s -sym-> s0e

  public Nfa mkNfa(Nfa.NameSource names) {
    Integer s0s = names.next();
    Integer s0e = names.next();
    Nfa nfa0 = new Nfa(s0s, s0e);
    nfa0.addTrans(s0s, sym, s0e);
    return nfa0;
  }
}

class Seq extends Regex {
  Regex r1, r2;

  public Seq(Regex r1, Regex r2) {
    this.r1 = r1; this.r2 = r2;
  }

  // If   nfa1 has form s1s ----> s1e 
  // and  nfa2 has form s2s ----> s2e 
  // then nfa0 has form s1s ----> s1e -eps-> s2s ----> s2e

  public Nfa mkNfa(Nfa.NameSource names) {
    Nfa nfa1 = r1.mkNfa(names);
    Nfa nfa2 = r2.mkNfa(names);
    Nfa nfa0 = new Nfa(nfa1.getStart(), nfa2.getExit());
    Iterator nfa1trans = nfa1.getTrans().entrySet().iterator();
    while (nfa1trans.hasNext()) 
      nfa0.addTrans((Map.Entry)nfa1trans.next());
    Iterator nfa2trans = nfa2.getTrans().entrySet().iterator();
    while (nfa2trans.hasNext()) 
      nfa0.addTrans((Map.Entry)nfa2trans.next());
    nfa0.addTrans(nfa1.getExit(), null, nfa2.getStart());
    return nfa0;
  }
}

class Alt extends Regex {
  Regex r1, r2;

  public Alt(Regex r1, Regex r2) {
    this.r1 = r1; this.r2 = r2;
  }

  // If   nfa1 has form s1s ----> s1e 
  // and  nfa2 has form s2s ----> s2e 
  // then nfa0 has form s0s -eps-> s1s ----> s1e -eps-> s0e
  //                    s0s -eps-> s2s ----> s2e -eps-> s0e

  public Nfa mkNfa(Nfa.NameSource names) {
    Nfa nfa1 = r1.mkNfa(names);
    Nfa nfa2 = r2.mkNfa(names);
    Integer s0s = names.next();
    Integer s0e = names.next();
    Nfa nfa0 = new Nfa(s0s, s0e);
    Iterator nfa1trans = nfa1.getTrans().entrySet().iterator();
    while (nfa1trans.hasNext()) 
      nfa0.addTrans((Map.Entry)nfa1trans.next());
    Iterator nfa2trans = nfa2.getTrans().entrySet().iterator();
    while (nfa2trans.hasNext()) 
      nfa0.addTrans((Map.Entry)nfa2trans.next());
    nfa0.addTrans(s0s, null, nfa1.getStart());
    nfa0.addTrans(s0s, null, nfa2.getStart());
    nfa0.addTrans(nfa1.getExit(), null, s0e);
    nfa0.addTrans(nfa2.getExit(), null, s0e);
    return nfa0;
  }
}

class Star extends Regex {
  Regex r;

  public Star(Regex r) {
    this.r = r; 
  }

  // If   nfa1 has form s1s ----> s1e 
  // then nfa0 has form s0s ----> s0s
  //                    s0s -eps-> s1s
  //                    s1e -eps-> s0s

  public Nfa mkNfa(Nfa.NameSource names) {
    Nfa nfa1 = r.mkNfa(names);
    Integer s0s = names.next();
    Nfa nfa0 = new Nfa(s0s, s0s);
    Iterator nfa1trans = nfa1.getTrans().entrySet().iterator();
    while (nfa1trans.hasNext()) 
      nfa0.addTrans((Map.Entry)nfa1trans.next());
    nfa0.addTrans(s0s, null, nfa1.getStart());
    nfa0.addTrans(nfa1.getExit(), null, s0s);
    return nfa0;
  }
}

// Trying the RE->NFA->DFA translation on three regular expressions

class Example98 {
  public static void main(String[] args) 
    throws IOException {
    Regex a = new Sym("A");
    Regex b = new Sym("B");
    Regex r = new Seq(new Star(new Alt(a, b)), new Seq(a, b));
    // The regular expression (a|b)*ab
    buildAndShow("dfa1.dot", r);
    // The regular expression ((a|b)*ab)*
    buildAndShow("dfa2.dot", new Star(r));
    // The regular expression ((a|b)*ab)((a|b)*ab)
    buildAndShow("dfa3.dot", new Seq(r, r));
  }

  public static void buildAndShow(String filename, Regex r) 
    throws IOException {
    Nfa nfa = r.mkNfa(new Nfa.NameSource());
    System.out.println(nfa);
    Dfa dfa = nfa.toDfa();
    System.out.println(dfa);
    System.out.println("Writing DFA graph to file " + filename);
    dfa.writeDot(filename);
    System.out.println();
  }
}

