//将正规式转变成NFA
package hjzgg.formal_ceremony_to_dfa;import java.util.ArrayList;class Edge{public int u, v;public char key;public Edge(int u, int v, char key) {super();this.u = u;this.v = v;this.key = key;}@Overridepublic String toString() {return u + "->" + v + " " + key;}@Overridepublic boolean equals(Object arg0) {Edge tmp = (Edge)arg0;return tmp.u==this.u && tmp.v==this.v && tmp.key==this.key;}@Overridepublic int hashCode() {return u+v+key;} }class NFA{public static final int MAX_NODE = 100;private boolean finalState[] = new boolean[MAX_NODE];//记录每一个节点是否为终态private String formal_ceremony;//正规式字符串private int cnt_node=1;//记录节点的个数private Map<Integer, Integer> endNode = new TreeMap<Integer, Integer>();//每一个开始节点对应的终端节点private ArrayList<Edge> nodeAl = new ArrayList<Edge>();private Vector<Pair>[] g = new Vector[MAX_NODE];//NFA图private Set<Character> st = new TreeSet<Character>();//正规式中出现的字符的集合public NFA(String formal_ceremony) {super();this.formal_ceremony = formal_ceremony;}private void addEdge(int u, int v, char ch){nodeAl.add(new Edge(u, v, ch));if(g[u] == null) g[u] = new Vector<Pair>();g[u].add(new Pair(v, ch));if(ch!='$')st.add(ch);}public boolean kernel_way(int fa, int ld, int rd, boolean isClosure){//fa表示区间的开始点,正规式的区间[ld, rd], isClosure表示这段区间查是否存在闭包 if(ld < 0 || rd >= formal_ceremony.length()){System.out.println("正规式不正确---发生数组越界!");return false;}int pre_node = fa;int inBracket = 0;//判断'|'是否在括弧内for(int i=ld; i<=rd; ++i){if(formal_ceremony.charAt(i)=='(') ++inBracket;else if(formal_ceremony.charAt(i)==')') --inBracket;else if(formal_ceremony.charAt(i)=='|' && 0==inBracket){if(!kernel_way(fa, ld, i-1, isClosure)) return false;if(!kernel_way(fa, i+1, rd, isClosure)) return false;return true;}}for(int i=ld; i<=rd; ++i){if(formal_ceremony.charAt(i)=='('){//又是一个子区间//寻找和 该 '('相匹配的')'int cntLeftBracket = 0;//统计遍历过程中'('出现的次数,遇到')'减去1int posRightBracket = -1;//记录相匹配的')'的位置int posLeftBracket = i;for(int j=i+1; j<=rd; ++j){if(formal_ceremony.charAt(j)=='(')++cntLeftBracket;else if(formal_ceremony.charAt(j)==')'){if(cntLeftBracket == 0){posRightBracket = j;break;}--cntLeftBracket;}}if(posRightBracket == -1){//出错System.out.println("正规式出错----括弧不匹配!");return false;}int nodeFather = 0;//括弧内正则式的开始节点if(posRightBracket+1 <= rd && formal_ceremony.charAt(posRightBracket+1)=='*'){i = posRightBracket+1;//过滤掉"()*"addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空pre_node = cnt_node;nodeFather = cnt_node;addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空pre_node = cnt_node;//处理()*括弧内的正规式if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, true)) return false;} else {nodeFather = pre_node;if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, false))//对于"(101)", 看成101return false;i = posRightBracket;}} else {//单个字符if(formal_ceremony.charAt(i)==')') continue;if(i+1 <= rd && formal_ceremony.charAt(i+1)=='*'){addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空pre_node = cnt_node;addEdge(pre_node, pre_node, formal_ceremony.charAt(i));if(i+1==rd && isClosure)addEdge(pre_node, fa, '$');//表示这一条边为空并且是连接到父亲节点else{if(endNode.containsKey(fa))addEdge(pre_node, endNode.get(fa), '$');else{addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 }}pre_node = cnt_node;++i;//过滤*} else {if(i==rd && isClosure){//是闭包的情况 addEdge(pre_node, fa, formal_ceremony.charAt(i));} else{if(endNode.containsKey(fa))addEdge(pre_node, endNode.get(fa), formal_ceremony.charAt(i));else{addEdge(pre_node, ++cnt_node, formal_ceremony.charAt(i));if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 }}pre_node = cnt_node;}}}return true;}private void checkFinalState(){//检查哪一个节点是终态for(int i=1; i<=cnt_node; ++i){int cc = 0;if(g[i] == null){//表明是终态finalState[i] = true;continue;}for(int j=0; j<g[i].size(); ++j)if(g[i].elementAt(j).v != i)++cc;if(cc == 0)//表明是终态finalState[i] = true;}}public boolean[] getFinalState(){return finalState;}public Vector<Pair>[] getNFAGraphics(){if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ // for(Edge e : nodeAl)//打印NFA // System.out.println(e); checkFinalState();return g;}return null;}public Set<Character> getCharacterSet(){return st;}public void outputNFA(){if(kernel_way(1, 0, formal_ceremony.length()-1, false)){checkFinalState();for(Edge e : nodeAl)System.out.println(e);}} }/** 将正规式转换成NFA* */ public class ToNFA {public static void main(String[] args){String formal_ceremony = "0*(100*)*0*"; // String formal_ceremony = "1(1010*|1(010)*1)*0"; // String formal_ceremony = "1(0|1)*101"; // String formal_ceremony = "0*1*(010)0*1*"; // String formal_ceremony = "(0|1|2)*"; // String formal_ceremony = "0|1"; // String formal_ceremony = "0|1|2|3"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "(0|1)*|(2|3)*"; // String formal_ceremony = "((10)|(01)*|(0|1))";NFA nfa = new NFA(formal_ceremony);nfa.outputNFA();} }
//将NFA转变成确定化DFA
package hjzgg.formal_ceremony_to_dfa;import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.Vector;class Pair {public int v;public char ch;public Pair(int v, char ch) {super();this.v = v;this.ch = ch;}}class MyHashSet extends HashSet<Integer>{//重写 set 集合的 hashcode()和equals()方法private int state;public void setState(int state){this.state = state;}public int getState(){return state;}@Overridepublic boolean equals(Object arg0) {MyHashSet tmp = (MyHashSet)arg0;if(tmp.size() != this.size()) return false;Iterator<Integer> it = this.iterator();while(it.hasNext()){if(!tmp.contains(it.next())) return false;}return true;}@Overridepublic int hashCode() {int sum = 0;Iterator<Integer> it = this.iterator();while(it.hasNext())sum += (((java.lang.Integer)it.next()).intValue());return sum;} }class DefinedNFA{private int dfaNode = 0;//defined DFA节点的个数private boolean[] finalState = null;//表示NFA中哪一个节点是终态private boolean[] newFinalState = new boolean[NFA.MAX_NODE] ;private Vector<Pair>[] g = null;//NFA 图private Set<Edge>edgeSet = new HashSet<Edge>(); //标记图中的边是否被访问过private MyHashSet st = null; //集合,表示每一个子集状态private Queue<MyHashSet> queue = new LinkedList<MyHashSet>();//存放要执行的子集状态private Set<MyHashSet> sst = new HashSet<MyHashSet>();private Set<Character> characterSet = null;//正规式中的字符的集合private ArrayList<Edge> nodeAl = new ArrayList<Edge>();//NFA边的集合public DefinedNFA(Vector<Pair>[] g, Set<Character> characterSet, boolean[] finalState) {super();this.g = g;this.characterSet = characterSet;this.finalState = finalState;}public Set<Character> getCharacterSet(){return characterSet;}public int getDfaNode(){return dfaNode;}public boolean[] getNewFinalState(){return newFinalState;}public ArrayList<Edge> getNodeAl(){return nodeAl;}private void dfs(int u, char ch){if(g[u]==null) return ;int len = g[u].size();for(int i=0; i<len; ++i){Pair pair = g[u].elementAt(i);Edge edge = new Edge(u, pair.v, pair.ch);if(!edgeSet.contains(edge) && pair.ch==ch){edgeSet.add(edge);st.add(pair.v);dfs(pair.v, '$');}}}public void checkIsFinalState(Set<Integer> st, int state){Iterator<Integer> it = st.iterator();while(it.hasNext()){int val = it.next();if(finalState[val])newFinalState[state] = true;}}private void initFirstSet(){edgeSet.clear();st = new MyHashSet();st.add(1);st.setState(++dfaNode);dfs(1, '$');checkIsFinalState(st, dfaNode);sst.add(st);queue.add(st);}private void addEdge(int u, int v, char ch){nodeAl.add(new Edge(u, v, ch));}public void ToStateMatrix(){initFirstSet();while(!queue.isEmpty()){MyHashSet myset = queue.poll();for(Character ch : characterSet){st = new MyHashSet();for(Integer i : myset){edgeSet.clear();dfs(i, ch);}if(st.size()>0){if(!sst.contains(st)){sst.add(st);queue.add(st);st.setState(++dfaNode);checkIsFinalState(st, dfaNode);} else {Iterator<MyHashSet> it = sst.iterator();while(it.hasNext()){MyHashSet tmp = it.next();if(tmp.equals(st)){st = tmp;break;}}}addEdge(myset.getState(), st.getState(), ch);} }}}public void outputDFA(){ToStateMatrix();//有状态转换矩阵得到defined NFAfor(Edge e : nodeAl)System.out.println(e);}}public class ToDefinedDFA {public static void main(String[] args) { // String formal_ceremony = "((10)|(01)*|(0|1))"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "1(0|1)*101";String formal_ceremony = "0*(100*)*0*";NFA nfa = new NFA(formal_ceremony);DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState());definedDFA.outputDFA();}}
//将确定化DFA最小化
package hjzgg.formal_ceremony_to_dfa;import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set;class MinimumDFA{private boolean[] newFinalState = null;//由确定化DFA得到private ArrayList<Edge> nodeAl = null;//由确定化DFA得到private int dfaNode;//确定化DFA节点的个数private Set<Character> characterSet = null;//正规式中的字符的集合private ArrayList<Set<Integer>> setList = new ArrayList<Set<Integer>>(); public MinimumDFA(boolean[] newFinalState, ArrayList<Edge> nodeAl, int dfaNode, Set<Character> characterSet) {super();this.newFinalState = newFinalState;this.nodeAl = nodeAl;this.dfaNode = dfaNode;this.characterSet = characterSet;}private void init(){//利用分割法将集合分成终态和非终态Set<Integer> finalStateSet = new HashSet<Integer>();Set<Integer> NofinalStateSet = new HashSet<Integer>();for(int i=1; i<=dfaNode; ++i)if(newFinalState[i])//终态 finalStateSet.add(i);else NofinalStateSet.add(i);setList.add(finalStateSet);setList.add(NofinalStateSet);}public void toMinimumDfa(){init();boolean flag = true;ArrayList<Set<Integer>> tmpSetList = new ArrayList<Set<Integer>>();while(flag){flag = false;hjzgg:for(int k=0; k<setList.size(); ++k){Set<Integer> st = setList.get(k);if(st.size()<=1) continue;for(Character ch : characterSet){Map<Integer, Integer> mp = new HashMap<Integer, Integer>();for(int i=0; i<nodeAl.size(); ++i){//st集合(也就是map的val值)在 ch这个点对应的集合 {st}a = {...}Edge edge = nodeAl.get(i);if(edge.key == ch && st.contains(edge.u))mp.put(edge.u, edge.v);}
for(Integer i : st)
if(!mp.containsKey(i))//表明i节点对应的是一条空边
mp.put(i, -1);
//将st集合拆分成两个不想交的集合Set<Integer> firstSet = new HashSet<Integer>();Set<Integer> secondSet = new HashSet<Integer>();for(int j=0; j<setList.size(); ++j){firstSet.clear();secondSet.clear();Set<Integer> tmpSt = setList.get(k);for(Entry<Integer, Integer> entry : mp.entrySet()){//返回此映射中包含的映射关系的 set 视图。返回的 set 中的每个元素都是一个 Map.Entryif(tmpSt.contains(entry.getValue()))firstSet.add(entry.getKey());else secondSet.add(entry.getKey());}if(firstSet.size()!=0 && secondSet.size()!=0){flag = true;//如果发现可以拆分的集合,则继续最顶层的while循环for(Integer i : tmpSt){//将firstSet 和 secondSet中都没有的元素添加到firstSet中if(!firstSet.contains(i) && !secondSet.contains(i))firstSet.add(i);}setList.remove(k);setList.add(firstSet);setList.add(secondSet);break hjzgg;}}}}} // for(int k=0; k<setList.size(); ++k)//输出最终的集合划分 // System.out.println(setList.get(k)); // System.out.println("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&");for(int k=0; k<setList.size(); ++k){Set<Integer> st = setList.get(k);if(st.size() > 1){//看成是一个等价的状态,选择第一个元素当作代表int first=0;for(Integer i : st){//取得第一个元素first = i;break;}ArrayList<Edge> tmpList = new ArrayList<Edge>();for(int i=0; i<nodeAl.size(); ++i){//遍历所有的边,找到不是firstEdge edge = nodeAl.get(i);if(st.contains(edge.u) && edge.u!=first){nodeAl.remove(i);--i;} else if(st.contains(edge.v) && edge.v!=first){nodeAl.remove(i);--i;tmpList.add(new Edge(edge.u, first, edge.key));}}nodeAl.addAll(tmpList);}}}public void outputMinimumDFA(){ // for(int i=0; i<nodeAl.size(); ++i)//输出未确定化的DFA // System.out.println(nodeAl.get(i)); toMinimumDfa();for(int i=0; i<nodeAl.size(); ++i)System.out.println(nodeAl.get(i));} }public class ToMinimumDFA {public static void main(String[] args) { // String formal_ceremony = "1(0|1)*101";String formal_ceremony = "0*(100*)*0*";NFA nfa = new NFA(formal_ceremony);DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState());definedDFA.ToStateMatrix();MinimumDFA minimumDFA = new MinimumDFA(definedDFA.getNewFinalState(), definedDFA.getNodeAl(), definedDFA.getDfaNode(), definedDFA.getCharacterSet());minimumDFA.outputMinimumDFA();}}