You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							282 lines
						
					
					
						
							6.4 KiB
						
					
					
				
			
		
		
	
	
							282 lines
						
					
					
						
							6.4 KiB
						
					
					
				var util      = require('./util'); | 
						|
var types     = require('./types'); | 
						|
var sets      = require('./sets'); | 
						|
var positions = require('./positions'); | 
						|
 | 
						|
 | 
						|
module.exports = function(regexpStr) { | 
						|
  var i = 0, l, c, | 
						|
      start = { type: types.ROOT, stack: []}, | 
						|
 | 
						|
      // Keep track of last clause/group and stack. | 
						|
      lastGroup = start, | 
						|
      last = start.stack, | 
						|
      groupStack = []; | 
						|
 | 
						|
 | 
						|
  var repeatErr = function(i) { | 
						|
    util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1)); | 
						|
  }; | 
						|
 | 
						|
  // Decode a few escaped characters. | 
						|
  var str = util.strToChars(regexpStr); | 
						|
  l = str.length; | 
						|
 | 
						|
  // Iterate through each character in string. | 
						|
  while (i < l) { | 
						|
    c = str[i++]; | 
						|
 | 
						|
    switch (c) { | 
						|
      // Handle escaped characters, inclues a few sets. | 
						|
      case '\\': | 
						|
        c = str[i++]; | 
						|
 | 
						|
        switch (c) { | 
						|
          case 'b': | 
						|
            last.push(positions.wordBoundary()); | 
						|
            break; | 
						|
 | 
						|
          case 'B': | 
						|
            last.push(positions.nonWordBoundary()); | 
						|
            break; | 
						|
 | 
						|
          case 'w': | 
						|
            last.push(sets.words()); | 
						|
            break; | 
						|
 | 
						|
          case 'W': | 
						|
            last.push(sets.notWords()); | 
						|
            break; | 
						|
 | 
						|
          case 'd': | 
						|
            last.push(sets.ints()); | 
						|
            break; | 
						|
 | 
						|
          case 'D': | 
						|
            last.push(sets.notInts()); | 
						|
            break; | 
						|
 | 
						|
          case 's': | 
						|
            last.push(sets.whitespace()); | 
						|
            break; | 
						|
 | 
						|
          case 'S': | 
						|
            last.push(sets.notWhitespace()); | 
						|
            break; | 
						|
 | 
						|
          default: | 
						|
            // Check if c is integer. | 
						|
            // In which case it's a reference. | 
						|
            if (/\d/.test(c)) { | 
						|
              last.push({ type: types.REFERENCE, value: parseInt(c, 10) }); | 
						|
 | 
						|
            // Escaped character. | 
						|
            } else { | 
						|
              last.push({ type: types.CHAR, value: c.charCodeAt(0) }); | 
						|
            } | 
						|
        } | 
						|
 | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Positionals. | 
						|
      case '^': | 
						|
          last.push(positions.begin()); | 
						|
        break; | 
						|
 | 
						|
      case '$': | 
						|
          last.push(positions.end()); | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Handle custom sets. | 
						|
      case '[': | 
						|
        // Check if this class is 'anti' i.e. [^abc]. | 
						|
        var not; | 
						|
        if (str[i] === '^') { | 
						|
          not = true; | 
						|
          i++; | 
						|
        } else { | 
						|
          not = false; | 
						|
        } | 
						|
 | 
						|
        // Get all the characters in class. | 
						|
        var classTokens = util.tokenizeClass(str.slice(i), regexpStr); | 
						|
 | 
						|
        // Increase index by length of class. | 
						|
        i += classTokens[1]; | 
						|
        last.push({ | 
						|
          type: types.SET, | 
						|
          set: classTokens[0], | 
						|
          not: not, | 
						|
        }); | 
						|
 | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Class of any character except \n. | 
						|
      case '.': | 
						|
        last.push(sets.anyChar()); | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Push group onto stack. | 
						|
      case '(': | 
						|
        // Create group. | 
						|
        var group = { | 
						|
          type: types.GROUP, | 
						|
          stack: [], | 
						|
          remember: true, | 
						|
        }; | 
						|
 | 
						|
        c = str[i]; | 
						|
 | 
						|
        // If if this is a special kind of group. | 
						|
        if (c === '?') { | 
						|
          c = str[i + 1]; | 
						|
          i += 2; | 
						|
 | 
						|
          // Match if followed by. | 
						|
          if (c === '=') { | 
						|
            group.followedBy = true; | 
						|
 | 
						|
          // Match if not followed by. | 
						|
          } else if (c === '!') { | 
						|
            group.notFollowedBy = true; | 
						|
 | 
						|
          } else if (c !== ':') { | 
						|
            util.error(regexpStr, | 
						|
              'Invalid group, character \'' + c + | 
						|
              '\' after \'?\' at column ' + (i - 1)); | 
						|
          } | 
						|
 | 
						|
          group.remember = false; | 
						|
        } | 
						|
 | 
						|
        // Insert subgroup into current group stack. | 
						|
        last.push(group); | 
						|
 | 
						|
        // Remember the current group for when the group closes. | 
						|
        groupStack.push(lastGroup); | 
						|
 | 
						|
        // Make this new group the current group. | 
						|
        lastGroup = group; | 
						|
        last = group.stack; | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Pop group out of stack. | 
						|
      case ')': | 
						|
        if (groupStack.length === 0) { | 
						|
          util.error(regexpStr, 'Unmatched ) at column ' + (i - 1)); | 
						|
        } | 
						|
        lastGroup = groupStack.pop(); | 
						|
 | 
						|
        // Check if this group has a PIPE. | 
						|
        // To get back the correct last stack. | 
						|
        last = lastGroup.options ? | 
						|
          lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack; | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Use pipe character to give more choices. | 
						|
      case '|': | 
						|
        // Create array where options are if this is the first PIPE | 
						|
        // in this clause. | 
						|
        if (!lastGroup.options) { | 
						|
          lastGroup.options = [lastGroup.stack]; | 
						|
          delete lastGroup.stack; | 
						|
        } | 
						|
 | 
						|
        // Create a new stack and add to options for rest of clause. | 
						|
        var stack = []; | 
						|
        lastGroup.options.push(stack); | 
						|
        last = stack; | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Repetition. | 
						|
      // For every repetition, remove last element from last stack | 
						|
      // then insert back a RANGE object. | 
						|
      // This design is chosen because there could be more than | 
						|
      // one repetition symbols in a regex i.e. `a?+{2,3}`. | 
						|
      case '{': | 
						|
        var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max; | 
						|
        if (rs !== null) { | 
						|
          if (last.length === 0) { | 
						|
            repeatErr(i); | 
						|
          } | 
						|
          min = parseInt(rs[1], 10); | 
						|
          max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min; | 
						|
          i += rs[0].length; | 
						|
 | 
						|
          last.push({ | 
						|
            type: types.REPETITION, | 
						|
            min: min, | 
						|
            max: max, | 
						|
            value: last.pop(), | 
						|
          }); | 
						|
        } else { | 
						|
          last.push({ | 
						|
            type: types.CHAR, | 
						|
            value: 123, | 
						|
          }); | 
						|
        } | 
						|
        break; | 
						|
 | 
						|
      case '?': | 
						|
        if (last.length === 0) { | 
						|
          repeatErr(i); | 
						|
        } | 
						|
        last.push({ | 
						|
          type: types.REPETITION, | 
						|
          min: 0, | 
						|
          max: 1, | 
						|
          value: last.pop(), | 
						|
        }); | 
						|
        break; | 
						|
 | 
						|
      case '+': | 
						|
        if (last.length === 0) { | 
						|
          repeatErr(i); | 
						|
        } | 
						|
        last.push({ | 
						|
          type: types.REPETITION, | 
						|
          min: 1, | 
						|
          max: Infinity, | 
						|
          value: last.pop(), | 
						|
        }); | 
						|
        break; | 
						|
 | 
						|
      case '*': | 
						|
        if (last.length === 0) { | 
						|
          repeatErr(i); | 
						|
        } | 
						|
        last.push({ | 
						|
          type: types.REPETITION, | 
						|
          min: 0, | 
						|
          max: Infinity, | 
						|
          value: last.pop(), | 
						|
        }); | 
						|
        break; | 
						|
 | 
						|
 | 
						|
      // Default is a character that is not `\[](){}?+*^$`. | 
						|
      default: | 
						|
        last.push({ | 
						|
          type: types.CHAR, | 
						|
          value: c.charCodeAt(0), | 
						|
        }); | 
						|
    } | 
						|
 | 
						|
  } | 
						|
 | 
						|
  // Check if any groups have not been closed. | 
						|
  if (groupStack.length !== 0) { | 
						|
    util.error(regexpStr, 'Unterminated group'); | 
						|
  } | 
						|
 | 
						|
  return start; | 
						|
}; | 
						|
 | 
						|
module.exports.types = types;
 | 
						|
 |