JDK14/Java14源码在线阅读

/*
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package jdk.nashorn.internal.runtime.regexp.joni;

import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg;
import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;

class Parser extends Lexer {

    protected final Regex regex;
    protected Node root;

    protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
                              // this approach will not affect recursive calls

    protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
        super(env, chars, p, end);
        regex = env.reg;
    }

    // onig_parse_make_tree
    protected final Node parse() {
        root = parseRegexp();
        regex.numMem = env.numMem;
        return root;
    }

    private boolean codeExistCheck(final int code, final boolean ignoreEscaped) {
        mark();

        boolean inEsc = false;
        while (left()) {
            if (ignoreEscaped && inEsc) {
                inEsc = false;
            } else {
                fetch();
                if (c == code) {
                    restore();
                    return true;
                }
                if (c == syntax.metaCharTable.esc) {
                    inEsc = true;
                }
            }
        }

        restore();
        return false;
    }

    private CClassNode parseCharClass() {
        fetchTokenInCC();

        final boolean neg;
        if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
            neg = true;
            fetchTokenInCC();
        } else {
            neg = false;
        }

        if (token.type == TokenType.CC_CLOSE) {
            if (!codeExistCheck(']', true)) {
                throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
            }
            env.ccEscWarn("]");
            token.type = TokenType.CHAR; /* allow []...] */
        }

        CClassNode cc = new CClassNode();
        CClassNode prevCC = null;
        CClassNode workCC = null;

        final CCStateArg arg = new CCStateArg();

        boolean andStart = false;
        arg.state = CCSTATE.START;

        while (token.type != TokenType.CC_CLOSE) {
            boolean fetched = false;

            switch (token.type) {

            case CHAR:
                if (token.getC() > 0xff) {
                    arg.inType = CCVALTYPE.CODE_POINT;
                } else {
                    arg.inType = CCVALTYPE.SB; // sb_char:
                }
                arg.v = token.getC();
                arg.vIsRaw = false;
                parseCharClassValEntry2(cc, arg); // goto val_entry2
                break;

            case RAW_BYTE:
                arg.v = token.getC();
                arg.inType = CCVALTYPE.SB; // raw_single:
                arg.vIsRaw = true;
                parseCharClassValEntry2(cc, arg); // goto val_entry2
                break;

            case CODE_POINT:
                arg.v = token.getCode();
                arg.vIsRaw = true;
                parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
                break;

            case CHAR_TYPE:
                cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
                cc.nextStateClass(arg, env); // next_class:
                break;

            case CC_RANGE:
                if (arg.state == CCSTATE.VALUE) {
                    fetchTokenInCC();
                    fetched = true;
                    if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
                        parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
                        break;
                    } else if (token.type == TokenType.CC_AND) {
                        env.ccEscWarn("-");
                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
                        break;
                    }
                    arg.state = CCSTATE.RANGE;
                } else if (arg.state == CCSTATE.START) {
                    arg.v = token.getC(); /* [-xa] is allowed */
                    arg.vIsRaw = false;
                    fetchTokenInCC();
                    fetched = true;
                    if (token.type == TokenType.CC_RANGE || andStart) {
                        env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
                    }
                    parseCharClassValEntry(cc, arg); // goto val_entry
                    break;
                } else if (arg.state == CCSTATE.RANGE) {
                    env.ccEscWarn("-");
                    parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
                    break;
                } else { /* CCS_COMPLETE */
                    fetchTokenInCC();
                    fetched = true;
                    if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
                        break;
                    } else if (token.type == TokenType.CC_AND) {
                        env.ccEscWarn("-");
                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
                        break;
                    }

                    if (syntax.allowDoubleRangeOpInCC()) {
                        env.ccEscWarn("-");
                        arg.inType = CCVALTYPE.SB;
                        arg.v = '-';
                        arg.vIsRaw = false;
                        parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
                        break;
                    }
                    throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
                }
                break;

            case CC_CC_OPEN: /* [ */
                final CClassNode acc = parseCharClass();
                cc.or(acc);
                break;

            case CC_AND:     /* && */
                if (arg.state == CCSTATE.VALUE) {
                    arg.v = 0; // ??? safe v ?
                    arg.vIsRaw = false;
                    cc.nextStateValue(arg, env);
                }
                /* initialize local variables */
                andStart = true;
                arg.state = CCSTATE.START;
                if (prevCC != null) {
                    prevCC.and(cc);
                } else {
                    prevCC = cc;
                    if (workCC == null) {
                        workCC = new CClassNode();
                    }
                    cc = workCC;
                }
                cc.clear();
                break;

            case EOT:
                throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);

            default:
                throw new InternalException(ERR_PARSER_BUG);
            } // switch

            if (!fetched) {
                fetchTokenInCC();
            }

        } // while

        if (arg.state == CCSTATE.VALUE) {
            arg.v = 0; // ??? safe v ?
            arg.vIsRaw = false;
            cc.nextStateValue(arg, env);
        }

        if (prevCC != null) {
            prevCC.and(cc);
            cc = prevCC;
        }

        if (neg) {
            cc.setNot();
        } else {
            cc.clearNot();
        }

        if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
            if (!cc.isEmpty()) {
                final int NEW_LINE = 0x0a;
                if (EncodingHelper.isNewLine(NEW_LINE)) {
                    cc.bs.set(NEW_LINE);
                }
            }
        }

        return cc;
    }

    private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) {
        arg.inType = CCVALTYPE.SB;
        arg.v = token.getC();
        arg.vIsRaw = false;
        parseCharClassValEntry2(cc, arg); // goto val_entry2
    }

    private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) {
        arg.v = '-';
        arg.vIsRaw = false;
        parseCharClassValEntry(cc, arg); // goto val_entry
    }

    private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) {
        arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
        parseCharClassValEntry2(cc, arg); // val_entry2:
    }

    private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) {
        cc.nextStateValue(arg, env);
    }

    private Node parseEnclose(final TokenType term) {
        Node node = null;

        if (!left()) {
            throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
        }

        int option = env.option;

        if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
            inc();
            if (!left()) {
                throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
            }

            fetch();
            switch(c) {
            case ':':  /* (?:...) grouping only */
                fetchToken(); // group:
                node = parseSubExp(term);
                returnCode = 1; /* group */
                return node;
            case '=':
                node = new AnchorNode(AnchorType.PREC_READ);
                break;
            case '!':  /*         preceding read */
                node = new AnchorNode(AnchorType.PREC_READ_NOT);
                break;
            case '>':  /* (?>...) stop backtrack */
                node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
                break;
            case '\'':
                break;
            case '<':  /* look behind (?<=...), (?<!...) */
                fetch();
                if (c == '=') {
                    node = new AnchorNode(AnchorType.LOOK_BEHIND);
                } else if (c == '!') {
                    node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
                } else {
                    throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                }
                break;
            case '@':
                if (syntax.op2AtMarkCaptureHistory()) {
                    final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
                    final int num = env.addMemEntry();
                    if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
                        throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
                    }
                    en.regNum = num;
                    node = en;
                } else {
                    throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                }
                break;

            // case 'p': #ifdef USE_POSIXLINE_OPTION
            case '-':
            case 'i':
            case 'm':
            case 's':
            case 'x':
                boolean neg = false;
                while (true) {
                    switch(c) {
                    case ':':
                    case ')':
                        break;
                    case '-':
                        neg = true;
                        break;
                    case 'x':
                        option = bsOnOff(option, Option.EXTEND, neg);
                        break;
                    case 'i':
                        option = bsOnOff(option, Option.IGNORECASE, neg);
                        break;
                    case 's':
                        if (syntax.op2OptionPerl()) {
                            option = bsOnOff(option, Option.MULTILINE, neg);
                        } else {
                            throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                        }
                        break;
                    case 'm':
                        if (syntax.op2OptionPerl()) {
                            option = bsOnOff(option, Option.SINGLELINE, !neg);
                        } else if (syntax.op2OptionRuby()) {
                            option = bsOnOff(option, Option.MULTILINE, neg);
                        } else {
                            throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                        }
                        break;
                    // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
                    // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
                    // break;

                    default:
                        throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
                    } // switch

                    if (c == ')') {
                        final EncloseNode en = new EncloseNode(option, 0); // node_new_option
                        node = en;
                        returnCode = 2; /* option only */
                        return node;
                    } else if (c == ':') {
                        final int prev = env.option;
                        env.option = option;
                        fetchToken();
                        final Node target = parseSubExp(term);
                        env.option = prev;
                        final EncloseNode en = new EncloseNode(option, 0); // node_new_option
                        en.setTarget(target);
                        node = en;
                        returnCode = 0;
                        return node;
                    }
                    if (!left()) {
                        throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
                    }
                    fetch();
                } // while

            default:
                throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
            } // switch

        } else {
            if (isDontCaptureGroup(env.option)) {
                fetchToken(); // goto group
                node = parseSubExp(term);
                returnCode = 1; /* group */
                return node;
            }
            final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
            final int num = env.addMemEntry();
            en.regNum = num;
            node = en;
        }

        fetchToken();
        final Node target = parseSubExp(term);

        if (node.getType() == NodeType.ANCHOR) {
            final AnchorNode an = (AnchorNode) node;
            an.setTarget(target);
        } else {
            final EncloseNode en = (EncloseNode)node;
            en.setTarget(target);
            if (en.type == EncloseType.MEMORY) {
                /* Don't move this to previous of parse_subexp() */
                env.setMemNode(en.regNum, node);
            }
        }
        returnCode = 0;
        return node; // ??
    }

    private Node parseExp(final TokenType term) {
        if (token.type == term)
         {
            return StringNode.EMPTY; // goto end_of_token
        }

        Node node = null;
        boolean group = false;

        switch(token.type) {
        case ALT:
        case EOT:
            return StringNode.EMPTY; // end_of_token:, node_new_empty

        case SUBEXP_OPEN:
            node = parseEnclose(TokenType.SUBEXP_CLOSE);
            if (returnCode == 1) {
                group = true;
            } else if (returnCode == 2) { /* option only */
                final int prev = env.option;
                final EncloseNode en = (EncloseNode)node;
                env.option = en.option;
                fetchToken();
                final Node target = parseSubExp(term);
                env.option = prev;
                en.setTarget(target);
                return node;
            }
            break;
        case SUBEXP_CLOSE:
            if (!syntax.allowUnmatchedCloseSubexp()) {
                throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
            }
            if (token.escaped) {
                return parseExpTkRawByte(group); // goto tk_raw_byte
            }
            return parseExpTkByte(group); // goto tk_byte
        case STRING:
            return parseExpTkByte(group); // tk_byte:

        case RAW_BYTE:
            return parseExpTkRawByte(group); // tk_raw_byte:
        case CODE_POINT:
            final char[] buf = new char[] {(char)token.getCode()};
            // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
            node = new StringNode(buf, 0, 1);
            break;

        case CHAR_TYPE:
            switch(token.getPropCType()) {
            case CharacterType.D:
            case CharacterType.S:
            case CharacterType.W:
                if (Config.NON_UNICODE_SDW) {
                    final CClassNode cc = new CClassNode();
                    cc.addCType(token.getPropCType(), false, env, this);
                    if (token.getPropNot()) {
                        cc.setNot();
                    }
                    node = cc;
                }
                break;

            case CharacterType.SPACE:
            case CharacterType.DIGIT:
            case CharacterType.XDIGIT:
                // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
                final CClassNode ccn = new CClassNode();
                ccn.addCType(token.getPropCType(), false, env, this);
                if (token.getPropNot()) {
                    ccn.setNot();
                }
                node = ccn;
                break;

            default:
                throw new InternalException(ERR_PARSER_BUG);

            } // inner switch
            break;

        case CC_CC_OPEN:
            final CClassNode cc = parseCharClass();
            node = cc;
            if (isIgnoreCase(env.option)) {
                final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
                EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);

                if (arg.altRoot != null) {
                    node = ConsAltNode.newAltNode(node, arg.altRoot);
                }
            }
            break;

        case ANYCHAR:
            node = new AnyCharNode();
            break;

        case ANYCHAR_ANYTIME:
            node = new AnyCharNode();
            final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
            qn.setTarget(node);
            node = qn;
            break;

        case BACKREF:
            final int backRef = token.getBackrefRef();
            node = new BackRefNode(backRef, env);
            break;

        case ANCHOR:
            node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
            break;

        case OP_REPEAT:
        case INTERVAL:
            if (syntax.contextIndepRepeatOps()) {
                if (syntax.contextInvalidRepeatOps()) {
                    throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
                }
                node = StringNode.EMPTY; // node_new_empty
            } else {
                return parseExpTkByte(group); // goto tk_byte
            }
            break;

        default:
            throw new InternalException(ERR_PARSER_BUG);
        } //switch

        //targetp = node;

        fetchToken(); // re_entry:

        return parseExpRepeat(node, group); // repeat:
    }

    private Node parseExpTkByte(final boolean group) {
        final StringNode node = new StringNode(chars, token.backP, p); // tk_byte:
        while (true) {
            fetchToken();
            if (token.type != TokenType.STRING) {
                break;
            }

            if (token.backP == node.end) {
                node.end = p; // non escaped character, remain shared, just increase shared range
            } else {
                node.cat(chars, token.backP, p); // non continuous string stream, need to COW
            }
        }
        // targetp = node;
        return parseExpRepeat(node, group); // string_end:, goto repeat
    }

    private Node parseExpTkRawByte(final boolean group) {
        // tk_raw_byte:

        // important: we don't use 0xff mask here neither in the compiler
        // (in the template string) so we won't have to mask target
        // strings when comparing against them in the matcher
        final StringNode node = new StringNode((char)token.getC());
        node.setRaw();

        fetchToken();
        node.clearRaw();
        // !goto string_end;!
        return parseExpRepeat(node, group);
    }

    private Node parseExpRepeat(final Node targetp, final boolean group) {
        Node target = targetp;
        while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
            if (target.isInvalidQuantifier()) {
                throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
            }

            final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
                                                     token.getRepeatUpper(),
                                                     token.type == TokenType.INTERVAL);

            qtfr.greedy = token.getRepeatGreedy();
            final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
            Node qn = qtfr;

            if (token.getRepeatPossessive()) {
                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
                en.setTarget(qn);
                qn = en;
            }

            if (ret == 0) {
                target = qn;
            } else if (ret == 2) { /* split case: /abc+/ */
                target = ConsAltNode.newListNode(target, null);
                final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));

                fetchToken();
                return parseExpRepeatForCar(target, tmp, group);
            }
            fetchToken(); // goto re_entry
        }
        return target;
    }

    private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) {
        while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
            if (target.car.isInvalidQuantifier()) {
                throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
            }

            final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
                                                     token.getRepeatUpper(),

/**代码未完, 请加载全部代码(NowJava.com).**/
展开阅读全文

关注时代Java

关注时代Java