|
- 'use strict';
-
- var TokenType = require('./const.js').TokenType;
-
- var TAB = 9;
- var N = 10;
- var F = 12;
- var R = 13;
- var SPACE = 32;
- var DOUBLE_QUOTE = 34;
- var QUOTE = 39;
- var RIGHT_PARENTHESIS = 41;
- var STAR = 42;
- var SLASH = 47;
- var BACK_SLASH = 92;
- var UNDERSCORE = 95;
- var LEFT_CURLY_BRACE = 123;
- var RIGHT_CURLY_BRACE = 125;
-
- var WHITESPACE = 1;
- var PUNCTUATOR = 2;
- var DIGIT = 3;
- var STRING = 4;
-
- var PUNCTUATION = {
- 9: TokenType.Tab, // '\t'
- 10: TokenType.Newline, // '\n'
- 13: TokenType.Newline, // '\r'
- 32: TokenType.Space, // ' '
- 33: TokenType.ExclamationMark, // '!'
- 34: TokenType.QuotationMark, // '"'
- 35: TokenType.NumberSign, // '#'
- 36: TokenType.DollarSign, // '$'
- 37: TokenType.PercentSign, // '%'
- 38: TokenType.Ampersand, // '&'
- 39: TokenType.Apostrophe, // '\''
- 40: TokenType.LeftParenthesis, // '('
- 41: TokenType.RightParenthesis, // ')'
- 42: TokenType.Asterisk, // '*'
- 43: TokenType.PlusSign, // '+'
- 44: TokenType.Comma, // ','
- 45: TokenType.HyphenMinus, // '-'
- 46: TokenType.FullStop, // '.'
- 47: TokenType.Solidus, // '/'
- 58: TokenType.Colon, // ':'
- 59: TokenType.Semicolon, // ';'
- 60: TokenType.LessThanSign, // '<'
- 61: TokenType.EqualsSign, // '='
- 62: TokenType.GreaterThanSign, // '>'
- 63: TokenType.QuestionMark, // '?'
- 64: TokenType.CommercialAt, // '@'
- 91: TokenType.LeftSquareBracket, // '['
- 93: TokenType.RightSquareBracket, // ']'
- 94: TokenType.CircumflexAccent, // '^'
- 95: TokenType.LowLine, // '_'
- 123: TokenType.LeftCurlyBracket, // '{'
- 124: TokenType.VerticalLine, // '|'
- 125: TokenType.RightCurlyBracket, // '}'
- 126: TokenType.Tilde // '~'
- };
- var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, Object.keys(PUNCTUATION)) + 1;
- var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH);
- var IS_PUNCTUATOR = new Uint32Array(SYMBOL_CATEGORY_LENGTH);
-
- // fill categories
- Object.keys(PUNCTUATION).forEach(function(key) {
- SYMBOL_CATEGORY[Number(key)] = PUNCTUATOR;
- IS_PUNCTUATOR[Number(key)] = PUNCTUATOR;
- }, SYMBOL_CATEGORY);
-
- // don't treat as punctuator
- IS_PUNCTUATOR[UNDERSCORE] = 0;
-
- for (var i = 48; i <= 57; i++) {
- SYMBOL_CATEGORY[i] = DIGIT;
- }
-
- SYMBOL_CATEGORY[SPACE] = WHITESPACE;
- SYMBOL_CATEGORY[TAB] = WHITESPACE;
- SYMBOL_CATEGORY[N] = WHITESPACE;
- SYMBOL_CATEGORY[R] = WHITESPACE;
- SYMBOL_CATEGORY[F] = WHITESPACE;
-
- SYMBOL_CATEGORY[QUOTE] = STRING;
- SYMBOL_CATEGORY[DOUBLE_QUOTE] = STRING;
-
- //
- // scanner
- //
-
- var Scanner = function(source, initBlockMode, initLine, initColumn) {
- this.source = source;
-
- this.pos = source.charCodeAt(0) === 0xFEFF ? 1 : 0;
- this.eof = this.pos === this.source.length;
- this.line = typeof initLine === 'undefined' ? 1 : initLine;
- this.lineStartPos = typeof initColumn === 'undefined' ? -1 : -initColumn;
-
- this.minBlockMode = initBlockMode ? 1 : 0;
- this.blockMode = this.minBlockMode;
- this.urlMode = false;
-
- this.prevToken = null;
- this.token = null;
- this.buffer = [];
- };
-
- Scanner.prototype = {
- lookup: function(offset) {
- if (offset === 0) {
- return this.token;
- }
-
- for (var i = this.buffer.length; !this.eof && i < offset; i++) {
- this.buffer.push(this.getToken());
- }
-
- return offset <= this.buffer.length ? this.buffer[offset - 1] : null;
- },
- lookupType: function(offset, type) {
- var token = this.lookup(offset);
-
- return token !== null && token.type === type;
- },
- next: function() {
- var newToken = null;
-
- if (this.buffer.length !== 0) {
- newToken = this.buffer.shift();
- } else if (!this.eof) {
- newToken = this.getToken();
- }
-
- this.prevToken = this.token;
- this.token = newToken;
-
- return newToken;
- },
-
- tokenize: function() {
- var tokens = [];
-
- for (; this.pos < this.source.length; this.pos++) {
- tokens.push(this.getToken());
- }
-
- return tokens;
- },
-
- getToken: function() {
- var code = this.source.charCodeAt(this.pos);
- var line = this.line;
- var column = this.pos - this.lineStartPos;
- var offset = this.pos;
- var next;
- var type;
- var value;
-
- switch (code < SYMBOL_CATEGORY_LENGTH ? SYMBOL_CATEGORY[code] : 0) {
- case DIGIT:
- type = TokenType.DecimalNumber;
- value = this.readDecimalNumber();
- break;
-
- case STRING:
- type = TokenType.String;
- value = this.readString(code);
- break;
-
- case WHITESPACE:
- type = TokenType.Space;
- value = this.readSpaces();
- break;
-
- case PUNCTUATOR:
- if (code === SLASH) {
- next = this.pos + 1 < this.source.length ? this.source.charCodeAt(this.pos + 1) : 0;
-
- if (next === STAR) { // /*
- type = TokenType.Comment;
- value = this.readComment();
- break;
- } else if (next === SLASH && !this.urlMode) { // //
- if (this.blockMode > 0) {
- var skip = 2;
-
- while (this.source.charCodeAt(this.pos + 2) === SLASH) {
- skip++;
- }
-
- type = TokenType.Identifier;
- value = this.readIdentifier(skip);
-
- this.urlMode = this.urlMode || value === 'url';
- } else {
- type = TokenType.Unknown;
- value = this.readUnknown();
- }
- break;
- }
- }
-
- type = PUNCTUATION[code];
- value = String.fromCharCode(code);
- this.pos++;
-
- if (code === RIGHT_PARENTHESIS) {
- this.urlMode = false;
- } else if (code === LEFT_CURLY_BRACE) {
- this.blockMode++;
- } else if (code === RIGHT_CURLY_BRACE) {
- if (this.blockMode > this.minBlockMode) {
- this.blockMode--;
- }
- }
-
- break;
-
- default:
- type = TokenType.Identifier;
- value = this.readIdentifier(0);
-
- this.urlMode = this.urlMode || value === 'url';
- }
-
- this.eof = this.pos === this.source.length;
-
- return {
- type: type,
- value: value,
-
- offset: offset,
- line: line,
- column: column
- };
- },
-
- isNewline: function(code) {
- if (code === N || code === F || code === R) {
- if (code === R && this.pos + 1 < this.source.length && this.source.charCodeAt(this.pos + 1) === N) {
- this.pos++;
- }
-
- this.line++;
- this.lineStartPos = this.pos;
- return true;
- }
-
- return false;
- },
-
- readSpaces: function() {
- var start = this.pos;
-
- for (; this.pos < this.source.length; this.pos++) {
- var code = this.source.charCodeAt(this.pos);
-
- if (!this.isNewline(code) && code !== SPACE && code !== TAB) {
- break;
- }
- }
-
- return this.source.substring(start, this.pos);
- },
-
- readComment: function() {
- var start = this.pos;
-
- for (this.pos += 2; this.pos < this.source.length; this.pos++) {
- var code = this.source.charCodeAt(this.pos);
-
- if (code === STAR) { // */
- if (this.source.charCodeAt(this.pos + 1) === SLASH) {
- this.pos += 2;
- break;
- }
- } else {
- this.isNewline(code);
- }
- }
-
- return this.source.substring(start, this.pos);
- },
-
- readUnknown: function() {
- var start = this.pos;
-
- for (this.pos += 2; this.pos < this.source.length; this.pos++) {
- if (this.isNewline(this.source.charCodeAt(this.pos), this.source)) {
- break;
- }
- }
-
- return this.source.substring(start, this.pos);
- },
-
- readString: function(quote) {
- var start = this.pos;
- var res = '';
-
- for (this.pos++; this.pos < this.source.length; this.pos++) {
- var code = this.source.charCodeAt(this.pos);
-
- if (code === BACK_SLASH) {
- var end = this.pos++;
-
- if (this.isNewline(this.source.charCodeAt(this.pos), this.source)) {
- res += this.source.substring(start, end);
- start = this.pos + 1;
- }
- } else if (code === quote) {
- this.pos++;
- break;
- }
- }
-
- return res + this.source.substring(start, this.pos);
- },
-
- readDecimalNumber: function() {
- var start = this.pos;
- var code;
-
- for (this.pos++; this.pos < this.source.length; this.pos++) {
- code = this.source.charCodeAt(this.pos);
-
- if (code < 48 || code > 57) { // 0 .. 9
- break;
- }
- }
-
- return this.source.substring(start, this.pos);
- },
-
- readIdentifier: function(skip) {
- var start = this.pos;
-
- for (this.pos += skip; this.pos < this.source.length; this.pos++) {
- var code = this.source.charCodeAt(this.pos);
-
- if (code === BACK_SLASH) {
- this.pos++;
-
- // skip escaped unicode sequence that can ends with space
- // [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
- for (var i = 0; i < 7 && this.pos + i < this.source.length; i++) {
- code = this.source.charCodeAt(this.pos + i);
-
- if (i !== 6) {
- if ((code >= 48 && code <= 57) || // 0 .. 9
- (code >= 65 && code <= 70) || // A .. F
- (code >= 97 && code <= 102)) { // a .. f
- continue;
- }
- }
-
- if (i > 0) {
- this.pos += i - 1;
- if (code === SPACE || code === TAB || this.isNewline(code)) {
- this.pos++;
- }
- }
-
- break;
- }
- } else if (code < SYMBOL_CATEGORY_LENGTH &&
- IS_PUNCTUATOR[code] === PUNCTUATOR) {
- break;
- }
- }
-
- return this.source.substring(start, this.pos);
- }
- };
-
- // warm up tokenizer to elimitate code branches that never execute
- // fix soft deoptimizations (insufficient type feedback)
- new Scanner('\n\r\r\n\f//""\'\'/**/1a;.{url(a)}').lookup(1e3);
-
- module.exports = Scanner;
|