Princeton Algorithms: Part 2 [week 6: Regular Expressions]

前端之家收集整理的这篇文章主要介绍了Princeton Algorithms: Part 2 [week 6: Regular Expressions]前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

Exercise




Question Explanation

The correct answer is: 6 7 8

Here are the sets of reachable states after reading in the first i characters
(and following epsilon-transitions):

i substring set of reachable states
------------------------------------------------
0 0 1
1 B 2 3 5 6 7 8 11 12 13
2 B A 2 3 4 5 6 7 8 9 10 11 12 13
3 B A A 2 3 4 5 6 7 8 9 10 11 12 13
4 B A A A 2 3 4 5 6 7 8 9 10 11 12 13
5 B A A A C 6 7 8
6 B A A A C C 6 7 8



Here are all of the edges in the epsilon-transition digraph:
0-> 1
0-> 3
2->10
3-> 4
3-> 9
5-> 6
6-> 5
6-> 7
8-> 9
9-> 3
9->10
10->11


Interview Questions




/*************************************************************************
 *  Compilation:  javac NFA.java
 *  Execution:    java NFA regexp text
 *  Dependencies: Stack.java Bag.java Digraph.java DirectedDFS.java
 *
 *  % java NFA "(A*B|AC)D" AAAABD
 *  true
 *
 *  % java NFA "(A*B|AC)D" AAAAC
 *  false
 *
 *  % java NFA "(a|(bc)*d)*" abcbcd
 *  true
 *
 *  % java NFA "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd
 *  true
 *
 *  Remarks
 *  -----------
 *    - This version does not suport the + operator or multiway-or.
 *
 *    - This version does not handle character classes,*      Metacharacters (either in the text or pattern),capturing
 *      capabilities,greedy vs. relucantant modifier,and
 *      other features in industrial-strength implementations such
 *      as java.util.regexp.
 *
 *************************************************************************/

public class NFA { 

    private Digraph G;         // digraph of epsilon transitions
    private String regexp;     // regular expression
    private int M;             // number of characters in regular expression

    // Create the NFA for the given RE   
    public NFA(String regexp) {
        this.regexp = regexp;
        M = regexp.length();
        Stack<Integer> ops = new Stack<Integer>(); 
        G = new Digraph(M+1); 
        for (int i = 0; i < M; i++) { 
            int lp = i; 
            if (regexp.charAt(i) == '(' || regexp.charAt(i) == '|') 
                ops.push(i); 
            else if (regexp.charAt(i) == ')') {
                int or = ops.pop(); 

                // 2-way or operator
                if (regexp.charAt(or) == '|') { 
                    lp = ops.pop();
                    G.addEdge(lp,or+1);
                    G.addEdge(or,i);
                }
                else if (regexp.charAt(or) == '(')
                    lp = or;
                else assert false;
            } 

            // closure operator (uses 1-character lookahead)
            if (i < M-1 && regexp.charAt(i+1) == '*') { 
                G.addEdge(lp,i+1); 
                G.addEdge(i+1,lp); 
            } 
            if (regexp.charAt(i) == '(' || regexp.charAt(i) == '*' || regexp.charAt(i) == ')') 
                G.addEdge(i,i+1);
        } 
    } 

    // Does the NFA recognize txt? 
    public boolean recognizes(String txt) {
        DirectedDFS dfs = new DirectedDFS(G,0);
        Bag<Integer> pc = new Bag<Integer>();
        for (int v = 0; v < G.V(); v++)
            if (dfs.marked(v)) pc.add(v);

        // Compute possible NFA states for txt[i+1]
        for (int i = 0; i < txt.length(); i++) {
            Bag<Integer> match = new Bag<Integer>();
            for (int v : pc) {
                if (v == M) continue;
                if ((regexp.charAt(v) == txt.charAt(i)) || regexp.charAt(v) == '.')
                    match.add(v+1); 
            }
            dfs = new DirectedDFS(G,match); 
            pc = new Bag<Integer>();
            for (int v = 0; v < G.V(); v++)
                if (dfs.marked(v)) pc.add(v);

            // optimization if no states reachable
            if (pc.size() == 0) return false;
        }

        // check for accept state
        for (int v : pc)
            if (v == M) return true;
        return false;
    }


    public static void main(String[] args) {
        String regexp = "(" + args[0] + ")";
        String txt = args[1];
        if (txt.indexOf('|') >= 0) {
            throw new IllegalArgumentException("| character in text is not supported");
        }
        NFA nfa = new NFA(regexp);
        StdOut.println(nfa.recognizes(txt));
    }

} 
原文链接:https://www.f2er.com/regex/360948.html

猜你在找的正则表达式相关文章