JDK14/Java14源码在线阅读

/*
 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package jdk.nashorn.internal.parser;

import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
import static java.lang.Character.LOWERCASE_LETTER;
import static java.lang.Character.OTHER_PUNCTUATION;
import static java.lang.Character.SPACE_SEPARATOR;
import static java.lang.Character.UPPERCASE_LETTER;

import java.util.HashMap;
import java.util.Locale;

/**
 * JavaScript date parser. This class first tries to parse a date string
 * according to the extended ISO 8601 format specified in ES5 15.9.1.15.
 * If that fails, it falls back to legacy mode in which it accepts a range
 * of different formats.
 *
 * <p>This class is neither thread-safe nor reusable. Calling the
 * <code>parse()</code> method more than once will yield undefined results.</p>
 */
public class DateParser {

    /** Constant for index position of parsed year value. */
    public final static int YEAR        = 0;
    /** Constant for index position of parsed month value. */
    public final static int MONTH       = 1;
    /** Constant for index position of parsed day value. */
    public final static int DAY         = 2;
    /** Constant for index position of parsed hour value. */
    public final static int HOUR        = 3;
    /** Constant for index position of parsed minute value. */
    public final static int MINUTE      = 4;
    /** Constant for index position of parsed second value. */
    public final static int SECOND      = 5;
    /** Constant for index position of parsed millisecond value. */
    public final static int MILLISECOND = 6;
    /** Constant for index position of parsed time zone offset value. */
    public final static int TIMEZONE    = 7;

    private enum Token {
        UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END
    }

    private final String string;
    private final int length;
    private final Integer[] fields;
    private int pos = 0;
    private Token token;
    private int tokenLength;
    private Name nameValue;
    private int numValue;
    private int currentField = YEAR;
    private int yearSign = 0;
    private boolean namedMonth = false;

    private final static HashMap<String,Name> names = new HashMap<>();

    static {
        addName("monday", Name.DAY_OF_WEEK, 0);
        addName("tuesday", Name.DAY_OF_WEEK, 0);
        addName("wednesday", Name.DAY_OF_WEEK, 0);
        addName("thursday", Name.DAY_OF_WEEK, 0);
        addName("friday", Name.DAY_OF_WEEK, 0);
        addName("saturday", Name.DAY_OF_WEEK, 0);
        addName("sunday", Name.DAY_OF_WEEK, 0);
        addName("january", Name.MONTH_NAME, 1);
        addName("february", Name.MONTH_NAME, 2);
        addName("march", Name.MONTH_NAME, 3);
        addName("april", Name.MONTH_NAME, 4);
        addName("may", Name.MONTH_NAME, 5);
        addName("june", Name.MONTH_NAME, 6);
        addName("july", Name.MONTH_NAME, 7);
        addName("august", Name.MONTH_NAME, 8);
        addName("september", Name.MONTH_NAME, 9);
        addName("october", Name.MONTH_NAME, 10);
        addName("november", Name.MONTH_NAME, 11);
        addName("december", Name.MONTH_NAME, 12);
        addName("am", Name.AM_PM, 0);
        addName("pm", Name.AM_PM, 12);
        addName("z", Name.TIMEZONE_ID, 0);
        addName("gmt", Name.TIMEZONE_ID, 0);
        addName("ut", Name.TIMEZONE_ID, 0);
        addName("utc", Name.TIMEZONE_ID, 0);
        addName("est", Name.TIMEZONE_ID, -5 * 60);
        addName("edt", Name.TIMEZONE_ID, -4 * 60);
        addName("cst", Name.TIMEZONE_ID, -6 * 60);
        addName("cdt", Name.TIMEZONE_ID, -5 * 60);
        addName("mst", Name.TIMEZONE_ID, -7 * 60);
        addName("mdt", Name.TIMEZONE_ID, -6 * 60);
        addName("pst", Name.TIMEZONE_ID, -8 * 60);
        addName("pdt", Name.TIMEZONE_ID, -7 * 60);
        addName("t", Name.TIME_SEPARATOR, 0);
    }

    /**
     * Construct a new <code>DateParser</code> instance for parsing the given string.
     * @param string the string to be parsed
     */
    public DateParser(final String string) {
        this.string = string;
        this.length = string.length();
        this.fields = new Integer[TIMEZONE + 1];
    }

    /**
     * Try parsing the given string as date according to the extended ISO 8601 format
     * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails.
     * This method returns <code>true</code> if the string could be parsed.
     * @return true if the string could be parsed as date
     */
    public boolean parse() {
        return parseEcmaDate() || parseLegacyDate();
    }

    /**
     * Try parsing the date string according to the rules laid out in ES5 15.9.1.15.
     * The date string must conform to the following format:
     *
     * <pre>  [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre>
     *
     * <p>If the string does not contain a time zone offset, the <code>TIMEZONE</code> field
     * is set to <code>0</code> (GMT).</p>
     * @return true if string represents a valid ES5 date string.
     */
    public boolean parseEcmaDate() {

        if (token == null) {
            token = next();
        }

        while (token != Token.END) {

            switch (token) {
                case NUMBER:
                    if (currentField == YEAR && yearSign != 0) {
                        // 15.9.1.15.1 Extended year must have six digits
                        if (tokenLength != 6) {
                            return false;
                        }
                        numValue *= yearSign;
                    } else if (!checkEcmaField(currentField, numValue)) {
                        return false;
                    }
                    if (!skipEcmaDelimiter()) {
                        return false;
                    }
                    if (currentField < TIMEZONE) {
                        set(currentField++, numValue);
                    }
                    break;

                case NAME:
                    if (nameValue == null) {
                        return false;
                    }
                    switch (nameValue.type) {
                        case Name.TIME_SEPARATOR:
                            if (currentField == YEAR || currentField > HOUR) {
                                return false;
                            }
                            currentField = HOUR;
                            break;
                        case Name.TIMEZONE_ID:
                            if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) {
                                return false;
                            }
                            break;
                        default:
                            return false;
                    }
                    break;

                case SIGN:
                    if (peek() == -1) {
                        // END after sign - wrong!
                        return false;
                    }

                    if (currentField == YEAR) {
                        yearSign = numValue;
                    } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) {
                        // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds
                        return false;
                    }
                    break;

                default:
                    return false;
            }
            token = next();
        }

        return patchResult(true);
    }

    /**
     * Try parsing the date using a fuzzy algorithm that can handle a variety of formats.
     *
     * <p>Numbers separated by <code>':'</code> are treated as time values, optionally followed by a
     * millisecond value separated by <code>'.'</code>. Other number values are treated as date values.
     * The exact sequence of day, month, and year values to apply is determined heuristically.</p>
     *
     * <p>English month names and selected time zone names as well as AM/PM markers are recognized
     * and handled properly. Additionally, numeric time zone offsets such as <code>(+|-)hh:mm</code> or
     * <code>(+|-)hhmm</code> are recognized. If the string does not contain a time zone offset
     * the <code>TIMEZONE</code>field is left undefined, meaning the local time zone should be applied.</p>
     *
     * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well.
     * All other text causes parsing to fail.</p>
     *
     * @return true if the string could be parsed
     */
    public boolean parseLegacyDate() {

        if (yearSign != 0 || currentField > DAY) {
            // we don't support signed years in legacy mode
            return false;
        }
        if (token == null) {
            token = next();
        }

        while (token != Token.END) {

            switch (token) {
                case NUMBER:
                    if (skipDelimiter(':')) {
                        // A number followed by ':' is parsed as time
                        if (!setTimeField(numValue)) {
                            return false;
                        }
                        // consume remaining time tokens
                        do {
                            token = next();
                            if (token != Token.NUMBER || !setTimeField(numValue)) {
                                return false;
                            }
                        } while (skipDelimiter(isSet(SECOND) ? '.' : ':'));

                    } else {
                        // Parse as date token
                        if (!setDateField(numValue)) {
                            return false;
                        }
                        skipDelimiter('-');
                    }
                    break;

                case NAME:
                    if (nameValue == null) {
                        return false;
                    }
                    switch (nameValue.type) {
                        case Name.AM_PM:
                            if (!setAmPm(nameValue.value)) {
                                return false;
                            }
                            break;
                        case Name.MONTH_NAME:
                            if (!setMonth(nameValue.value)) {
                                return false;
                            }
                            break;
                        case Name.TIMEZONE_ID:
                            if (!setTimezone(nameValue.value, false)) {
                                return false;
                            }
                            break;
                        case Name.TIME_SEPARATOR:
                            return false;
                        default:
                            break;
                    }
                    if (nameValue.type != Name.TIMEZONE_ID) {
                        skipDelimiter('-');
                    }
                    break;

                case SIGN:
                    if (peek() == -1) {
                        // END after sign - wrong!
                        return false;
                    }

                    if (!setTimezone(readTimeZoneOffset(), true)) {
                        return false;
                    }
                    break;

                case PARENTHESIS:
                    if (!skipParentheses()) {
                        return false;
                    }
                    break;

                case SEPARATOR:
                    break;

                default:
                    return false;
            }
            token = next();
        }

        return patchResult(false);
    }

    /**
     * Get the parsed date and time fields as an array of <code>Integers</code>.
     *
     * <p>If parsing was successful, all fields are guaranteed to be set except for the
     * <code>TIMEZONE</code> field which may be <code>null</code>, meaning that local time zone
     * offset should be applied.</p>
     *
     * @return the parsed date fields
     */
    public Integer[] getDateFields() {
        return fields;
    }

    private boolean isSet(final int field) {
        return fields[field] != null;
    }

    private Integer get(final int field) {
        return fields[field];
    }

    private void set(final int field, final int value) {
        fields[field] = value;
    }

    private int peek() {
        return pos < length ? string.charAt(pos) : -1;
    }

    // Skip delimiter if followed by a number. Used for ISO 8601 formatted dates
    private boolean skipNumberDelimiter(final char c) {
        if (pos < length - 1 && string.charAt(pos) == c
                && Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) {
            token = null;
            pos++;
            return true;
        }
        return false;
    }

    private boolean skipDelimiter(final char c) {
        if (pos < length && string.charAt(pos) == c) {
            token = null;
            pos++;
            return true;
        }
        return false;
    }

    private Token next() {
        if (pos >= length) {
            tokenLength = 0;
            return Token.END;
        }

        final char c = string.charAt(pos);

        if (c > 0x80) {
            tokenLength = 1;
            pos++;
            return Token.UNKNOWN; // We only deal with ASCII here
        }

        final int type = Character.getType(c);
        switch (type) {
            case DECIMAL_DIGIT_NUMBER:
                numValue = readNumber(6);
                return Token.NUMBER;
            case SPACE_SEPARATOR :
            case OTHER_PUNCTUATION:
                tokenLength = 1;
                pos++;
                return Token.SEPARATOR;
            case UPPERCASE_LETTER:
            case LOWERCASE_LETTER:
                nameValue = readName();
                return Token.NAME;
            default:
                tokenLength = 1;
                pos++;
                switch (c) {
                    case '(':
                        return Token.PARENTHESIS;
                    case '-':
                    case '+':
                        numValue = c == '-' ? -1 : 1;
                        return Token.SIGN;
                    default:
                        return Token.UNKNOWN;
                }
        }
    }

    private static boolean checkLegacyField(final int field, final int value) {
        switch (field) {
            case HOUR:
                return isHour(value);
            case MINUTE:
            case SECOND:
                return isMinuteOrSecond(value);
            case MILLISECOND:
                return isMillisecond(value);
            default:
                // skip validation on other legacy fields as we don't know what's what
                return true;
        }
    }

    private boolean checkEcmaField(final int field, final int value) {
        switch (field) {
            case YEAR:
                return tokenLength == 4;
            case MONTH:
                return tokenLength == 2 && isMonth(value);
            case DAY:
                return tokenLength == 2 && isDay(value);
            case HOUR:
                return tokenLength == 2 && isHour(value);
            case MINUTE:
            case SECOND:
                return tokenLength == 2 && isMinuteOrSecond(value);
            case MILLISECOND:
                // we allow millisecond to be less than 3 digits
                return tokenLength < 4 && isMillisecond(value);
            default:
                return true;
        }
    }

    private boolean skipEcmaDelimiter() {
        switch (currentField) {
            case YEAR:
            case MONTH:
                return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1;
            case DAY:
                return peek() == 'T' || peek() == -1;
            case HOUR:
            case MINUTE:
                return skipNumberDelimiter(':') || endOfTime();
            case SECOND:
                return skipNumberDelimiter('.') || endOfTime();
            default:
                return true;
        }
    }

    private boolean endOfTime() {
        final int c = peek();
        return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' ';
    }

    private static boolean isAsciiLetter(final char ch) {
        return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
    }

    private static boolean isAsciiDigit(final char ch) {
        return '0' <= ch && ch <= '9';
    }

    private int readNumber(final int maxDigits) {
        final int start = pos;
        int n = 0;
        final int max = Math.min(length, pos + maxDigits);
        while (pos < max && isAsciiDigit(string.charAt(pos))) {
            n = n * 10 + string.charAt(pos++) - '0';
        }
        tokenLength = pos - start;
        return n;
    }

    private Name readName() {
        final int start = pos;
        final int limit = Math.min(pos + 3, length);

        // first read up to the key length
        while (pos < limit && isAsciiLetter(string.charAt(pos))) {
            pos++;
        }
        final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH);
        final Name name = names.get(key);
        // then advance to end of name
        while (pos < length && isAsciiLetter(string.charAt(pos))) {
            pos++;
        }

        tokenLength = pos - start;
        // make sure we have the full name or a prefix
        if (name != null && name.matches(string, start, tokenLength)) {
            return name;
        }
        return null;
    }

    private int readTimeZoneOffset() {
        final int sign = string.charAt(pos - 1) == '+' ? 1 : -1;
        int offset = readNumber(2);
        skipDelimiter(':');
        offset = offset * 60 + readNumber(2);
        return sign * offset;
    }

    private boolean skipParentheses() {
        int parenCount = 1;
        while (pos < length && parenCount != 0) {
            final char c = string.charAt(pos++);
            if (c == '(') {
                parenCount++;
            } else if (c == ')') {
                parenCount--;
            }
        }
        return true;
    }

    private static int getDefaultValue(final int field) {
        switch (field) {
            case MONTH:
            case DAY:
                return 1;
            default:
                return 0;
        }
    }

    private static boolean isDay(final int n) {
        return 1 <= n && n <= 31;
    }

    private static boolean isMonth(final int n) {
        return 1 <= n && n <= 12;
    }

    private static boolean isHour(final int n) {
        return 0 <= n && n <= 24;
    }

    private static boolean isMinuteOrSecond(final int n) {
        return 0 <= n && n < 60;
    }

    private static boolean isMillisecond(final int n) {
        return 0<= n && n < 1000;
    }

    private boolean setMonth(final int m) {
        if (!isSet(MONTH)) {
            namedMonth = true;
            set(MONTH, m);
            return true;
        }
        return false;
    }

    private boolean setDateField(final int n) {
        for (int field = YEAR; field != HOUR; field++) {
            if (!isSet(field)) {
                // no validation on legacy date fields
                set(field, n);
                return true;
            }
        }
        return false;
    }

    private boolean setTimeField(final int n) {
        for (int field = HOUR; field != TIMEZONE; field++) {
            if (!isSet(field)) {
                if (checkLegacyField(field, n)) {
                    set(field, n);
                    return true;
                }
                return false;
            }
        }
        return false;
    }

    private boolean setTimezone(final int offset, final boolean asNumericOffset) {
        if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) {
            set(TIMEZONE, offset);
            return true;
        }
        return false;
    }

    private boolean setAmPm(final int offset) {
        if (!isSet(HOUR)) {
            return false;
        }
        final int hour = get(HOUR);
        if (hour >= 0 && hour <= 12) {
            set(HOUR, hour + offset);
        }
        return true;
    }

    private boolean patchResult(final boolean strict) {
        // sanity checks - make sure we have something
        if (!isSet(YEAR) && !isSet(HOUR)) {
            return false;
        }
        if (isSet(HOUR) && !isSet(MINUTE)) {
            return false;
        }
        // fill in default values for unset fields except timezone
        for (int field = YEAR; field <= TIMEZONE; field++) {
            if (get(field) == null) {
                if (field == TIMEZONE && !strict) {
                    // We only use UTC as default timezone for dates parsed complying with
                    // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty
                    // and local timezone is used.
                    continue;
                }
                final int value = getDefaultValue(field);
                set(field, value);
            }

/**代码未完, 请加载全部代码(NowJava.com).**/
展开阅读全文

关注时代Java

关注时代Java