xregexp.js (65740B)
1 /*! 2 * XRegExp 3.0.0-pre 3 * <http://xregexp.com/> 4 * Steven Levithan © 2007-2012 MIT License 5 */ 6 7 /** 8 * XRegExp provides augmented, extensible regular expressions. You get new syntax, flags, and 9 * methods beyond what browsers support natively. XRegExp is also a regex utility belt with tools 10 * to make your client-side grepping simpler and more powerful, while freeing you from worrying 11 * about pesky cross-browser inconsistencies and the dubious `lastIndex` property. 12 */ 13 var XRegExp = (function(undefined) { 14 'use strict'; 15 16 /* ============================== 17 * Private variables 18 * ============================== */ 19 20 var // ... 21 22 // Property name used for extended regex instance data 23 REGEX_DATA = 'xregexp', 24 25 // Internal reference to the `XRegExp` object 26 self, 27 28 // Optional features that can be installed and uninstalled 29 features = { 30 astral: false, 31 natives: false 32 }, 33 34 // Store native methods to use and restore ('native' is an ES3 reserved keyword) 35 nativ = { 36 exec: RegExp.prototype.exec, 37 test: RegExp.prototype.test, 38 match: String.prototype.match, 39 replace: String.prototype.replace, 40 split: String.prototype.split 41 }, 42 43 // Storage for fixed/extended native methods 44 fixed = {}, 45 46 // Storage for regexes cached by `XRegExp.cache` 47 cache = {}, 48 49 // Storage for pattern details cached by the `XRegExp` constructor 50 patternCache = {}, 51 52 // Storage for regex syntax tokens added internally or by `XRegExp.addToken` 53 tokens = [], 54 55 // Token scopes 56 defaultScope = 'default', 57 classScope = 'class', 58 59 // Regexes that match native regex syntax, including octals 60 nativeTokens = { 61 // Any native multicharacter token in default scope, or any single character 62 'default': /\\(?:0(?:[0-3][0-7]{0,2}|[4-7][0-7]?)?|[1-9]\d*|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S])|\(\?[:=!]|[?*+]\?|{\d+(?:,\d*)?}\??|[\s\S]/, 63 // Any native multicharacter token in character class scope, or any single character 64 'class': /\\(?:[0-3][0-7]{0,2}|[4-7][0-7]?|x[\dA-Fa-f]{2}|u[\dA-Fa-f]{4}|c[A-Za-z]|[\s\S])|[\s\S]/ 65 }, 66 67 // Any backreference or dollar-prefixed character in replacement strings 68 replacementToken = /\$(?:{([\w$]+)}|(\d\d?|[\s\S]))/g, 69 70 // Check for correct `exec` handling of nonparticipating capturing groups 71 correctExecNpcg = nativ.exec.call(/()??/, '')[1] === undefined, 72 73 // Check for flag y support 74 hasNativeY = RegExp.prototype.sticky !== undefined, 75 76 // Tracker for known flags, including addon flags 77 registeredFlags = { 78 g: true, 79 i: true, 80 m: true, 81 y: hasNativeY 82 }, 83 84 // Shortcut to `Object.prototype.toString` 85 toString = {}.toString, 86 87 // Shortcut to `XRegExp.addToken` 88 add; 89 90 /* ============================== 91 * Private functions 92 * ============================== */ 93 94 /** 95 * Attaches named capture data and `XRegExp.prototype` properties to a regex object. 96 * @private 97 * @param {RegExp} regex Regex to augment. 98 * @param {Array} captureNames Array with capture names, or `null`. 99 * @param {Boolean} [addProto=false] Whether to attach `XRegExp.prototype` properties. Not 100 * attaching properties avoids a minor performance penalty. 101 * @returns {RegExp} Augmented regex. 102 */ 103 function augment(regex, captureNames, addProto) { 104 var p; 105 106 if (addProto) { 107 // Can't auto-inherit these since the XRegExp constructor returns a nonprimitive value 108 if (regex.__proto__) { 109 regex.__proto__ = self.prototype; 110 } else { 111 for (p in self.prototype) { 112 // A `self.prototype.hasOwnProperty(p)` check wouldn't be worth it here, since 113 // this is performance sensitive, and enumerable `Object.prototype` or 114 // `RegExp.prototype` extensions exist on `regex.prototype` anyway 115 regex[p] = self.prototype[p]; 116 } 117 } 118 } 119 120 regex[REGEX_DATA] = {captureNames: captureNames}; 121 122 return regex; 123 } 124 125 /** 126 * Removes any duplicate characters from the provided string. 127 * @private 128 * @param {String} str String to remove duplicate characters from. 129 * @returns {String} String with any duplicate characters removed. 130 */ 131 function clipDuplicates(str) { 132 return nativ.replace.call(str, /([\s\S])(?=[\s\S]*\1)/g, ''); 133 } 134 135 /** 136 * Copies a regex object while preserving special properties for named capture and augmenting with 137 * `XRegExp.prototype` methods. The copy has a fresh `lastIndex` property (set to zero). Allows 138 * adding and removing native flags while copying the regex. 139 * @private 140 * @param {RegExp} regex Regex to copy. 141 * @param {Object} [options] Allows specifying native flags to add or remove while copying the 142 * regex, and whether to attach `XRegExp.prototype` properties. 143 * @returns {RegExp} Copy of the provided regex, possibly with modified flags. 144 */ 145 function copy(regex, options) { 146 if (!self.isRegExp(regex)) { 147 throw new TypeError('Type RegExp expected'); 148 } 149 150 // Get native flags in use 151 var flags = nativ.exec.call(/\/([a-z]*)$/i, String(regex))[1]; 152 options = options || {}; 153 154 if (options.add) { 155 flags = clipDuplicates(flags + options.add); 156 } 157 158 if (options.remove) { 159 // Would need to escape `options.remove` if this was public 160 flags = nativ.replace.call(flags, new RegExp('[' + options.remove + ']+', 'g'), ''); 161 } 162 163 // Augment with `XRegExp.prototype` methods, but use the native `RegExp` constructor and 164 // avoid searching for special tokens. That would be wrong for regexes constructed by 165 // `RegExp`, and unnecessary for regexes constructed by `XRegExp` because the regex has 166 // already undergone the translation to native regex syntax 167 regex = augment( 168 new RegExp(regex.source, flags), 169 hasNamedCapture(regex) ? regex[REGEX_DATA].captureNames.slice(0) : null, 170 options.addProto 171 ); 172 173 return regex; 174 } 175 176 /** 177 * Returns a new copy of the object used to hold extended regex instance data, tailored for a 178 * native nonaugmented regex. 179 * @private 180 * @returns {Object} Object with base regex instance data. 181 */ 182 function getBaseProps() { 183 return {captureNames: null}; 184 } 185 186 /** 187 * Determines whether a regex has extended instance data used to track capture names. 188 * @private 189 * @param {RegExp} regex Regex to check. 190 * @returns {Boolean} Whether the regex uses named capture. 191 */ 192 function hasNamedCapture(regex) { 193 return !!(regex[REGEX_DATA] && regex[REGEX_DATA].captureNames); 194 } 195 196 /** 197 * Returns the first index at which a given value can be found in an array. 198 * @private 199 * @param {Array} array Array to search. 200 * @param {*} value Value to locate in the array. 201 * @returns {Number} Zero-based index at which the item is found, or -1. 202 */ 203 function indexOf(array, value) { 204 // Use the native array method, if available 205 if (Array.prototype.indexOf) { 206 return array.indexOf(value); 207 } 208 209 var len = array.length, i; 210 211 // Not a very good shim, but good enough for XRegExp's use of it 212 for (i = 0; i < len; ++i) { 213 if (array[i] === value) { 214 return i; 215 } 216 } 217 218 return -1; 219 } 220 221 /** 222 * Determines whether a value is of the specified type, by resolving its internal [[Class]]. 223 * @private 224 * @param {*} value Object to check. 225 * @param {String} type Type to check for, in TitleCase. 226 * @returns {Boolean} Whether the object matches the type. 227 */ 228 function isType(value, type) { 229 return toString.call(value) === '[object ' + type + ']'; 230 } 231 232 /** 233 * Checks whether the next nonignorable token after the specified position is a quantifier. 234 * @private 235 * @param {String} pattern Pattern to search within. 236 * @param {Number} pos Index in `pattern` to search at. 237 * @param {String} flags Flags used by the pattern. 238 * @returns {Boolean} Whether the next token is a quantifier. 239 */ 240 function isQuantifierNext(pattern, pos, flags) { 241 return nativ.test.call( 242 flags.indexOf('x') > -1 ? 243 // Ignore any leading whitespace, line comments, and inline comments 244 /^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/ : 245 // Ignore any leading inline comments 246 /^(?:\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/, 247 pattern.slice(pos) 248 ); 249 } 250 251 /** 252 * Checks for flag-related errors, and strips/applies flags in a leading mode modifier. Offloads 253 * the flag preparation logic from the `XRegExp` constructor. 254 * @private 255 * @param {String} pattern Regex pattern, possibly with a leading mode modifier. 256 * @param {String} flags Any combination of flags. 257 * @returns {Object} Object with properties `pattern` and `flags`. 258 */ 259 function prepareFlags(pattern, flags) { 260 var i; 261 262 // Recent browsers throw on duplicate flags, so copy this behavior for nonnative flags 263 if (clipDuplicates(flags) !== flags) { 264 throw new SyntaxError('Invalid duplicate regex flag ' + flags); 265 } 266 267 // Strip and apply a leading mode modifier with any combination of flags except g or y 268 pattern = nativ.replace.call(pattern, /^\(\?([\w$]+)\)/, function($0, $1) { 269 if (nativ.test.call(/[gy]/, $1)) { 270 throw new SyntaxError('Cannot use flag g or y in mode modifier ' + $0); 271 } 272 // Allow duplicate flags within the mode modifier 273 flags = clipDuplicates(flags + $1); 274 return ''; 275 }); 276 277 // Throw on unknown native or nonnative flags 278 for (i = 0; i < flags.length; ++i) { 279 if (!registeredFlags[flags.charAt(i)]) { 280 throw new SyntaxError('Unknown regex flag ' + flags.charAt(i)); 281 } 282 } 283 284 return { 285 pattern: pattern, 286 flags: flags 287 }; 288 } 289 290 /** 291 * Prepares an options object from the given value. 292 * @private 293 * @param {String|Object} value Value to convert to an options object. 294 * @returns {Object} Options object. 295 */ 296 function prepareOptions(value) { 297 value = value || {}; 298 299 if (isType(value, 'String')) { 300 value = self.forEach(value, /[^\s,]+/, function(match) { 301 this[match] = true; 302 }, {}); 303 } 304 305 return value; 306 } 307 308 /** 309 * Registers a flag so it doesn't throw an 'unknown flag' error. 310 * @private 311 * @param {String} flag Single-character flag to register. 312 */ 313 function registerFlag(flag) { 314 if (!/^[\w$]$/.test(flag)) { 315 throw new Error('Flag must be a single character A-Za-z0-9_$'); 316 } 317 318 registeredFlags[flag] = true; 319 } 320 321 /** 322 * Runs built-in and custom regex syntax tokens in reverse insertion order at the specified 323 * position, until a match is found. 324 * @private 325 * @param {String} pattern Original pattern from which an XRegExp object is being built. 326 * @param {String} flags Flags being used to construct the regex. 327 * @param {Number} pos Position to search for tokens within `pattern`. 328 * @param {Number} scope Regex scope to apply: 'default' or 'class'. 329 * @param {Object} context Context object to use for token handler functions. 330 * @returns {Object} Object with properties `matchLength`, `output`, and `reparse`; or `null`. 331 */ 332 function runTokens(pattern, flags, pos, scope, context) { 333 var i = tokens.length, 334 result = null, 335 match, 336 t; 337 338 // Run in reverse insertion order 339 while (i--) { 340 t = tokens[i]; 341 if ( 342 (t.scope === scope || t.scope === 'all') && 343 (!t.flag || flags.indexOf(t.flag) > -1) 344 ) { 345 match = self.exec(pattern, t.regex, pos, 'sticky'); 346 if (match) { 347 result = { 348 matchLength: match[0].length, 349 output: t.handler.call(context, match, scope, flags), 350 reparse: t.reparse 351 }; 352 // Finished with token tests 353 break; 354 } 355 } 356 } 357 358 return result; 359 } 360 361 /** 362 * Enables or disables implicit astral mode opt-in. 363 * @private 364 * @param {Boolean} on `true` to enable; `false` to disable. 365 */ 366 function setAstral(on) { 367 // Reset the pattern cache used by the `XRegExp` constructor, since the same pattern and 368 // flags might now produce different results 369 self.cache.flush('patterns'); 370 371 features.astral = on; 372 } 373 374 /** 375 * Enables or disables native method overrides. 376 * @private 377 * @param {Boolean} on `true` to enable; `false` to disable. 378 */ 379 function setNatives(on) { 380 RegExp.prototype.exec = (on ? fixed : nativ).exec; 381 RegExp.prototype.test = (on ? fixed : nativ).test; 382 String.prototype.match = (on ? fixed : nativ).match; 383 String.prototype.replace = (on ? fixed : nativ).replace; 384 String.prototype.split = (on ? fixed : nativ).split; 385 386 features.natives = on; 387 } 388 389 /** 390 * Returns the object, or throws an error if it is `null` or `undefined`. This is used to follow 391 * the ES5 abstract operation `ToObject`. 392 * @private 393 * @param {*} value Object to check and return. 394 * @returns {*} The provided object. 395 */ 396 function toObject(value) { 397 // This matches both `null` and `undefined` 398 if (value == null) { 399 throw new TypeError('Cannot convert null or undefined to object'); 400 } 401 402 return value; 403 } 404 405 /* ============================== 406 * Constructor 407 * ============================== */ 408 409 /** 410 * Creates an extended regular expression object for matching text with a pattern. Differs from a 411 * native regular expression in that additional syntax and flags are supported. The returned object 412 * is in fact a native `RegExp` and works with all native methods. 413 * @class XRegExp 414 * @constructor 415 * @param {String|RegExp} pattern Regex pattern string, or an existing regex object to copy. 416 * @param {String} [flags] Any combination of flags. 417 * Native flags: 418 * <li>`g` - global 419 * <li>`i` - ignore case 420 * <li>`m` - multiline anchors 421 * <li>`y` - sticky (Firefox 3+) 422 * Additional XRegExp flags: 423 * <li>`n` - explicit capture 424 * <li>`s` - dot matches all (aka singleline) 425 * <li>`x` - free-spacing and line comments (aka extended) 426 * <li>`A` - astral (requires the Unicode Base addon) 427 * Flags cannot be provided when constructing one `RegExp` from another. 428 * @returns {RegExp} Extended regular expression object. 429 * @example 430 * 431 * // With named capture and flag x 432 * XRegExp('(?<year> [0-9]{4} ) -? # year \n\ 433 * (?<month> [0-9]{2} ) -? # month \n\ 434 * (?<day> [0-9]{2} ) # day ', 'x'); 435 * 436 * // Providing a regex object copies it. Native regexes are recompiled using native (not XRegExp) 437 * // syntax. Copies maintain special properties for named capture, are augmented with 438 * // `XRegExp.prototype` methods, and have fresh `lastIndex` properties (set to zero). 439 * XRegExp(/regex/); 440 */ 441 self = function(pattern, flags) { 442 var context = { 443 hasNamedCapture: false, 444 captureNames: [] 445 }, 446 scope = defaultScope, 447 output = '', 448 pos = 0, 449 result, 450 token, 451 key; 452 453 if (self.isRegExp(pattern)) { 454 if (flags !== undefined) { 455 throw new TypeError('Cannot supply flags when copying a RegExp'); 456 } 457 return copy(pattern, {addProto: true}); 458 } 459 460 // Copy the argument behavior of `RegExp` 461 pattern = pattern === undefined ? '' : String(pattern); 462 flags = flags === undefined ? '' : String(flags); 463 464 // Cache-lookup key; intentionally using an invalid regex sequence as the separator 465 key = pattern + '***' + flags; 466 467 if (!patternCache[key]) { 468 // Check for flag-related errors, and strip/apply flags in a leading mode modifier 469 result = prepareFlags(pattern, flags); 470 pattern = result.pattern; 471 flags = result.flags; 472 473 // Use XRegExp's syntax tokens to translate the pattern to a native regex pattern... 474 // `pattern.length` may change on each iteration, if tokens use the `reparse` option 475 while (pos < pattern.length) { 476 do { 477 // Check for custom tokens at the current position 478 result = runTokens(pattern, flags, pos, scope, context); 479 // If the matched token used the `reparse` option, splice its output into the 480 // pattern before running tokens again at the same position 481 if (result && result.reparse) { 482 pattern = pattern.slice(0, pos) + 483 result.output + 484 pattern.slice(pos + result.matchLength); 485 } 486 } while (result && result.reparse); 487 488 if (result) { 489 output += result.output; 490 pos += (result.matchLength || 1); 491 } else { 492 // Get the native token at the current position 493 token = self.exec(pattern, nativeTokens[scope], pos, 'sticky')[0]; 494 output += token; 495 pos += token.length; 496 if (token === '[' && scope === defaultScope) { 497 scope = classScope; 498 } else if (token === ']' && scope === classScope) { 499 scope = defaultScope; 500 } 501 } 502 } 503 504 patternCache[key] = { 505 // Cleanup token cruft: repeated `(?:)(?:)` and leading/trailing `(?:)` 506 pattern: nativ.replace.call(output, /\(\?:\)(?=\(\?:\))|^\(\?:\)|\(\?:\)$/g, ''), 507 // Strip all but native flags 508 flags: nativ.replace.call(flags, /[^gimy]+/g, ''), 509 // `context.captureNames` has an item for each capturing group, even if unnamed 510 captures: context.hasNamedCapture ? context.captureNames : null 511 } 512 } 513 514 key = patternCache[key]; 515 return augment(new RegExp(key.pattern, key.flags), key.captures, /*addProto*/ true); 516 }; 517 518 // Add `RegExp.prototype` to the prototype chain 519 self.prototype = new RegExp; 520 521 /* ============================== 522 * Public properties 523 * ============================== */ 524 525 /** 526 * The XRegExp version number. 527 * @static 528 * @memberOf XRegExp 529 * @type String 530 */ 531 self.version = '3.0.0-pre'; 532 533 /* ============================== 534 * Public methods 535 * ============================== */ 536 537 /** 538 * Extends XRegExp syntax and allows custom flags. This is used internally and can be used to 539 * create XRegExp addons. If more than one token can match the same string, the last added wins. 540 * @memberOf XRegExp 541 * @param {RegExp} regex Regex object that matches the new token. 542 * @param {Function} handler Function that returns a new pattern string (using native regex syntax) 543 * to replace the matched token within all future XRegExp regexes. Has access to persistent 544 * properties of the regex being built, through `this`. Invoked with three arguments: 545 * <li>The match array, with named backreference properties. 546 * <li>The regex scope where the match was found: 'default' or 'class'. 547 * <li>The flags used by the regex, including any flags in a leading mode modifier. 548 * The handler function becomes part of the XRegExp construction process, so be careful not to 549 * construct XRegExps within the function or you will trigger infinite recursion. 550 * @param {Object} [options] Options object with optional properties: 551 * <li>`scope` {String} Scope where the token applies: 'default', 'class', or 'all'. 552 * <li>`flag` {String} Single-character flag that triggers the token. This also registers the 553 * flag, which prevents XRegExp from throwing an 'unknown flag' error when the flag is used. 554 * <li>`optionalFlags` {String} Any custom flags checked for within the token `handler` that are 555 * not required to trigger the token. This registers the flags, to prevent XRegExp from 556 * throwing an 'unknown flag' error when any of the flags are used. 557 * <li>`reparse` {Boolean} Whether the `handler` function's output should not be treated as 558 * final, and instead be reparseable by other tokens (including the current token). Allows 559 * token chaining or deferring. 560 * @example 561 * 562 * // Basic usage: Add \a for the ALERT control code 563 * XRegExp.addToken( 564 * /\\a/, 565 * function() {return '\\x07';}, 566 * {scope: 'all'} 567 * ); 568 * XRegExp('\\a[\\a-\\n]+').test('\x07\n\x07'); // -> true 569 * 570 * // Add the U (ungreedy) flag from PCRE and RE2, which reverses greedy and lazy quantifiers 571 * XRegExp.addToken( 572 * /([?*+]|{\d+(?:,\d*)?})(\??)/, 573 * function(match) {return match[1] + (match[2] ? '' : '?');}, 574 * {flag: 'U'} 575 * ); 576 * XRegExp('a+', 'U').exec('aaa')[0]; // -> 'a' 577 * XRegExp('a+?', 'U').exec('aaa')[0]; // -> 'aaa' 578 */ 579 self.addToken = function(regex, handler, options) { 580 options = options || {}; 581 var optionalFlags = options.optionalFlags, i; 582 583 if (options.flag) { 584 registerFlag(options.flag); 585 } 586 587 if (optionalFlags) { 588 optionalFlags = nativ.split.call(optionalFlags, ''); 589 for (i = 0; i < optionalFlags.length; ++i) { 590 registerFlag(optionalFlags[i]); 591 } 592 } 593 594 // Add to the private list of syntax tokens 595 tokens.push({ 596 regex: copy(regex, {add: 'g' + (hasNativeY ? 'y' : '')}), 597 handler: handler, 598 scope: options.scope || defaultScope, 599 flag: options.flag, 600 reparse: options.reparse 601 }); 602 603 // Reset the pattern cache used by the `XRegExp` constructor, since the same pattern and 604 // flags might now produce different results 605 self.cache.flush('patterns'); 606 }; 607 608 /** 609 * Caches and returns the result of calling `XRegExp(pattern, flags)`. On any subsequent call with 610 * the same pattern and flag combination, the cached copy of the regex is returned. 611 * @memberOf XRegExp 612 * @param {String} pattern Regex pattern string. 613 * @param {String} [flags] Any combination of XRegExp flags. 614 * @returns {RegExp} Cached XRegExp object. 615 * @example 616 * 617 * while (match = XRegExp.cache('.', 'gs').exec(str)) { 618 * // The regex is compiled once only 619 * } 620 */ 621 self.cache = function(pattern, flags) { 622 var key = pattern + '***' + (flags || ''); 623 return cache[key] || (cache[key] = self(pattern, flags)); 624 }; 625 626 // Intentionally undocumented 627 self.cache.flush = function(cacheName) { 628 if (cacheName === 'patterns') { 629 // Flush the pattern cache used by the `XRegExp` constructor 630 patternCache = {}; 631 } else { 632 // Flush the regex object cache populated by `XRegExp.cache` 633 cache = {}; 634 } 635 }; 636 637 /** 638 * Escapes any regular expression metacharacters, for use when matching literal strings. The result 639 * can safely be used at any point within a regex that uses any flags. 640 * @memberOf XRegExp 641 * @param {String} str String to escape. 642 * @returns {String} String with regex metacharacters escaped. 643 * @example 644 * 645 * XRegExp.escape('Escaped? <.>'); 646 * // -> 'Escaped\?\ <\.>' 647 */ 648 self.escape = function(str) { 649 return nativ.replace.call(toObject(str), /[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); 650 }; 651 652 /** 653 * Executes a regex search in a specified string. Returns a match array or `null`. If the provided 654 * regex uses named capture, named backreference properties are included on the match array. 655 * Optional `pos` and `sticky` arguments specify the search start position, and whether the match 656 * must start at the specified position only. The `lastIndex` property of the provided regex is not 657 * used, but is updated for compatibility. Also fixes browser bugs compared to the native 658 * `RegExp.prototype.exec` and can be used reliably cross-browser. 659 * @memberOf XRegExp 660 * @param {String} str String to search. 661 * @param {RegExp} regex Regex to search with. 662 * @param {Number} [pos=0] Zero-based index at which to start the search. 663 * @param {Boolean|String} [sticky=false] Whether the match must start at the specified position 664 * only. The string `'sticky'` is accepted as an alternative to `true`. 665 * @returns {Array} Match array with named backreference properties, or `null`. 666 * @example 667 * 668 * // Basic use, with named backreference 669 * var match = XRegExp.exec('U+2620', XRegExp('U\\+(?<hex>[0-9A-F]{4})')); 670 * match.hex; // -> '2620' 671 * 672 * // With pos and sticky, in a loop 673 * var pos = 2, result = [], match; 674 * while (match = XRegExp.exec('<1><2><3><4>5<6>', /<(\d)>/, pos, 'sticky')) { 675 * result.push(match[1]); 676 * pos = match.index + match[0].length; 677 * } 678 * // result -> ['2', '3', '4'] 679 */ 680 self.exec = function(str, regex, pos, sticky) { 681 var cacheFlags = 'g', match, r2; 682 683 if (hasNativeY && (sticky || (regex.sticky && sticky !== false))) { 684 cacheFlags += 'y'; 685 } 686 687 regex[REGEX_DATA] = regex[REGEX_DATA] || getBaseProps(); 688 689 // Shares cached copies with `XRegExp.match`/`replace` 690 r2 = regex[REGEX_DATA][cacheFlags] || ( 691 regex[REGEX_DATA][cacheFlags] = copy(regex, { 692 add: cacheFlags, 693 remove: sticky === false ? 'y' : '' 694 }) 695 ); 696 697 r2.lastIndex = pos = pos || 0; 698 699 // Fixed `exec` required for `lastIndex` fix, named backreferences, etc. 700 match = fixed.exec.call(r2, str); 701 702 if (sticky && match && match.index !== pos) { 703 match = null; 704 } 705 706 if (regex.global) { 707 regex.lastIndex = match ? r2.lastIndex : 0; 708 } 709 710 return match; 711 }; 712 713 /** 714 * Executes a provided function once per regex match. 715 * @memberOf XRegExp 716 * @param {String} str String to search. 717 * @param {RegExp} regex Regex to search with. 718 * @param {Function} callback Function to execute for each match. Invoked with four arguments: 719 * <li>The match array, with named backreference properties. 720 * <li>The zero-based match index. 721 * <li>The string being traversed. 722 * <li>The regex object being used to traverse the string. 723 * @param {*} [context] Object to use as `this` when executing `callback`. 724 * @returns {*} Provided `context` object. 725 * @example 726 * 727 * // Extracts every other digit from a string 728 * XRegExp.forEach('1a2345', /\d/, function(match, i) { 729 * if (i % 2) this.push(+match[0]); 730 * }, []); 731 * // -> [2, 4] 732 */ 733 self.forEach = function(str, regex, callback, context) { 734 var pos = 0, 735 i = -1, 736 match; 737 738 while ((match = self.exec(str, regex, pos))) { 739 // Because `regex` is provided to `callback`, the function can use the deprecated/ 740 // nonstandard `RegExp.prototype.compile` to mutate the regex. However, since 741 // `XRegExp.exec` doesn't use `lastIndex` to set the search position, this can't lead 742 // to an infinite loop, at least. Actually, because of the way `XRegExp.exec` caches 743 // globalized versions of regexes, mutating the regex will not have any effect on the 744 // iteration or matched strings, which is a nice side effect that brings extra safety 745 callback.call(context, match, ++i, str, regex); 746 747 pos = match.index + (match[0].length || 1); 748 } 749 750 return context; 751 }; 752 753 /** 754 * Copies a regex object and adds flag `g`. The copy maintains special properties for named 755 * capture, is augmented with `XRegExp.prototype` methods, and has a fresh `lastIndex` property 756 * (set to zero). Native regexes are not recompiled using XRegExp syntax. 757 * @memberOf XRegExp 758 * @param {RegExp} regex Regex to globalize. 759 * @returns {RegExp} Copy of the provided regex with flag `g` added. 760 * @example 761 * 762 * var globalCopy = XRegExp.globalize(/regex/); 763 * globalCopy.global; // -> true 764 */ 765 self.globalize = function(regex) { 766 return copy(regex, {add: 'g', addProto: true}); 767 }; 768 769 /** 770 * Installs optional features according to the specified options. Can be undone using 771 * {@link #XRegExp.uninstall}. 772 * @memberOf XRegExp 773 * @param {Object|String} options Options object or string. 774 * @example 775 * 776 * // With an options object 777 * XRegExp.install({ 778 * // Enables support for astral code points in Unicode addons (implicitly sets flag A) 779 * astral: true, 780 * 781 * // Overrides native regex methods with fixed/extended versions that support named 782 * // backreferences and fix numerous cross-browser bugs 783 * natives: true 784 * }); 785 * 786 * // With an options string 787 * XRegExp.install('astral natives'); 788 */ 789 self.install = function(options) { 790 options = prepareOptions(options); 791 792 if (!features.astral && options.astral) { 793 setAstral(true); 794 } 795 796 if (!features.natives && options.natives) { 797 setNatives(true); 798 } 799 }; 800 801 /** 802 * Checks whether an individual optional feature is installed. 803 * @memberOf XRegExp 804 * @param {String} feature Name of the feature to check. One of: 805 * <li>`natives` 806 * <li>`astral` 807 * @returns {Boolean} Whether the feature is installed. 808 * @example 809 * 810 * XRegExp.isInstalled('natives'); 811 */ 812 self.isInstalled = function(feature) { 813 return !!(features[feature]); 814 }; 815 816 /** 817 * Returns `true` if an object is a regex; `false` if it isn't. This works correctly for regexes 818 * created in another frame, when `instanceof` and `constructor` checks would fail. 819 * @memberOf XRegExp 820 * @param {*} value Object to check. 821 * @returns {Boolean} Whether the object is a `RegExp` object. 822 * @example 823 * 824 * XRegExp.isRegExp('string'); // -> false 825 * XRegExp.isRegExp(/regex/i); // -> true 826 * XRegExp.isRegExp(RegExp('^', 'm')); // -> true 827 * XRegExp.isRegExp(XRegExp('(?s).')); // -> true 828 */ 829 self.isRegExp = function(value) { 830 return toString.call(value) === '[object RegExp]'; 831 //return isType(value, 'RegExp'); 832 }; 833 834 /** 835 * Returns the first matched string, or in global mode, an array containing all matched strings. 836 * This is essentially a more convenient re-implementation of `String.prototype.match` that gives 837 * the result types you actually want (string instead of `exec`-style array in match-first mode, 838 * and an empty array instead of `null` when no matches are found in match-all mode). It also lets 839 * you override flag g and ignore `lastIndex`, and fixes browser bugs. 840 * @memberOf XRegExp 841 * @param {String} str String to search. 842 * @param {RegExp} regex Regex to search with. 843 * @param {String} [scope='one'] Use 'one' to return the first match as a string. Use 'all' to 844 * return an array of all matched strings. If not explicitly specified and `regex` uses flag g, 845 * `scope` is 'all'. 846 * @returns {String|Array} In match-first mode: First match as a string, or `null`. In match-all 847 * mode: Array of all matched strings, or an empty array. 848 * @example 849 * 850 * // Match first 851 * XRegExp.match('abc', /\w/); // -> 'a' 852 * XRegExp.match('abc', /\w/g, 'one'); // -> 'a' 853 * XRegExp.match('abc', /x/g, 'one'); // -> null 854 * 855 * // Match all 856 * XRegExp.match('abc', /\w/g); // -> ['a', 'b', 'c'] 857 * XRegExp.match('abc', /\w/, 'all'); // -> ['a', 'b', 'c'] 858 * XRegExp.match('abc', /x/, 'all'); // -> [] 859 */ 860 self.match = function(str, regex, scope) { 861 var global = (regex.global && scope !== 'one') || scope === 'all', 862 cacheFlags = (global ? 'g' : '') + (regex.sticky ? 'y' : ''), 863 result, 864 r2; 865 866 regex[REGEX_DATA] = regex[REGEX_DATA] || getBaseProps(); 867 868 // Shares cached copies with `XRegExp.exec`/`replace` 869 r2 = regex[REGEX_DATA][cacheFlags || 'noGY'] || ( 870 regex[REGEX_DATA][cacheFlags || 'noGY'] = copy(regex, { 871 add: cacheFlags, 872 remove: scope === 'one' ? 'g' : '' 873 }) 874 ); 875 876 result = nativ.match.call(toObject(str), r2); 877 878 if (regex.global) { 879 regex.lastIndex = ( 880 (scope === 'one' && result) ? 881 // Can't use `r2.lastIndex` since `r2` is nonglobal in this case 882 (result.index + result[0].length) : 0 883 ); 884 } 885 886 return global ? (result || []) : (result && result[0]); 887 }; 888 889 /** 890 * Retrieves the matches from searching a string using a chain of regexes that successively search 891 * within previous matches. The provided `chain` array can contain regexes and objects with `regex` 892 * and `backref` properties. When a backreference is specified, the named or numbered backreference 893 * is passed forward to the next regex or returned. 894 * @memberOf XRegExp 895 * @param {String} str String to search. 896 * @param {Array} chain Regexes that each search for matches within preceding results. 897 * @returns {Array} Matches by the last regex in the chain, or an empty array. 898 * @example 899 * 900 * // Basic usage; matches numbers within <b> tags 901 * XRegExp.matchChain('1 <b>2</b> 3 <b>4 a 56</b>', [ 902 * XRegExp('(?is)<b>.*?</b>'), 903 * /\d+/ 904 * ]); 905 * // -> ['2', '4', '56'] 906 * 907 * // Passing forward and returning specific backreferences 908 * html = '<a href="http://xregexp.com/api/">XRegExp</a>\ 909 * <a href="http://www.google.com/">Google</a>'; 910 * XRegExp.matchChain(html, [ 911 * {regex: /<a href="([^"]+)">/i, backref: 1}, 912 * {regex: XRegExp('(?i)^https?://(?<domain>[^/?#]+)'), backref: 'domain'} 913 * ]); 914 * // -> ['xregexp.com', 'www.google.com'] 915 */ 916 self.matchChain = function(str, chain) { 917 return (function recurseChain(values, level) { 918 var item = chain[level].regex ? chain[level] : {regex: chain[level]}, 919 matches = [], 920 addMatch = function(match) { 921 if (item.backref) { 922 /* Safari 4.0.5 (but not 5.0.5+) inappropriately uses sparse arrays to hold 923 * the `undefined`s for backreferences to nonparticipating capturing 924 * groups. In such cases, a `hasOwnProperty` or `in` check on its own would 925 * inappropriately throw the exception, so also check if the backreference 926 * is a number that is within the bounds of the array. 927 */ 928 if (!(match.hasOwnProperty(item.backref) || +item.backref < match.length)) { 929 throw new ReferenceError('Backreference to undefined group: ' + item.backref); 930 } 931 932 matches.push(match[item.backref] || ''); 933 } else { 934 matches.push(match[0]); 935 } 936 }, 937 i; 938 939 for (i = 0; i < values.length; ++i) { 940 self.forEach(values[i], item.regex, addMatch); 941 } 942 943 return ((level === chain.length - 1) || !matches.length) ? 944 matches : 945 recurseChain(matches, level + 1); 946 }([str], 0)); 947 }; 948 949 /** 950 * Returns a new string with one or all matches of a pattern replaced. The pattern can be a string 951 * or regex, and the replacement can be a string or a function to be called for each match. To 952 * perform a global search and replace, use the optional `scope` argument or include flag g if 953 * using a regex. Replacement strings can use `${n}` for named and numbered backreferences. 954 * Replacement functions can use named backreferences via `arguments[0].name`. Also fixes browser 955 * bugs compared to the native `String.prototype.replace` and can be used reliably cross-browser. 956 * @memberOf XRegExp 957 * @param {String} str String to search. 958 * @param {RegExp|String} search Search pattern to be replaced. 959 * @param {String|Function} replacement Replacement string or a function invoked to create it. 960 * Replacement strings can include special replacement syntax: 961 * <li>$$ - Inserts a literal $ character. 962 * <li>$&, $0 - Inserts the matched substring. 963 * <li>$` - Inserts the string that precedes the matched substring (left context). 964 * <li>$' - Inserts the string that follows the matched substring (right context). 965 * <li>$n, $nn - Where n/nn are digits referencing an existent capturing group, inserts 966 * backreference n/nn. 967 * <li>${n} - Where n is a name or any number of digits that reference an existent capturing 968 * group, inserts backreference n. 969 * Replacement functions are invoked with three or more arguments: 970 * <li>The matched substring (corresponds to $& above). Named backreferences are accessible as 971 * properties of this first argument. 972 * <li>0..n arguments, one for each backreference (corresponding to $1, $2, etc. above). 973 * <li>The zero-based index of the match within the total search string. 974 * <li>The total string being searched. 975 * @param {String} [scope='one'] Use 'one' to replace the first match only, or 'all'. If not 976 * explicitly specified and using a regex with flag g, `scope` is 'all'. 977 * @returns {String} New string with one or all matches replaced. 978 * @example 979 * 980 * // Regex search, using named backreferences in replacement string 981 * var name = XRegExp('(?<first>\\w+) (?<last>\\w+)'); 982 * XRegExp.replace('John Smith', name, '${last}, ${first}'); 983 * // -> 'Smith, John' 984 * 985 * // Regex search, using named backreferences in replacement function 986 * XRegExp.replace('John Smith', name, function(match) { 987 * return match.last + ', ' + match.first; 988 * }); 989 * // -> 'Smith, John' 990 * 991 * // String search, with replace-all 992 * XRegExp.replace('RegExp builds RegExps', 'RegExp', 'XRegExp', 'all'); 993 * // -> 'XRegExp builds XRegExps' 994 */ 995 self.replace = function(str, search, replacement, scope) { 996 var isRegex = self.isRegExp(search), 997 global = (search.global && scope !== 'one') || scope === 'all', 998 cacheFlags = (global ? 'g' : '') + (search.sticky ? 'y' : ''), 999 s2 = search, 1000 result; 1001 1002 if (isRegex) { 1003 search[REGEX_DATA] = search[REGEX_DATA] || getBaseProps(); 1004 1005 // Shares cached copies with `XRegExp.exec`/`match`. Since a copy is used, 1006 // `search`'s `lastIndex` isn't updated *during* replacement iterations 1007 s2 = search[REGEX_DATA][cacheFlags || 'noGY'] || ( 1008 search[REGEX_DATA][cacheFlags || 'noGY'] = copy(search, { 1009 add: cacheFlags, 1010 remove: scope === 'one' ? 'g' : '' 1011 }) 1012 ); 1013 } else if (global) { 1014 s2 = new RegExp(self.escape(String(search)), 'g'); 1015 } 1016 1017 // Fixed `replace` required for named backreferences, etc. 1018 result = fixed.replace.call(toObject(str), s2, replacement); 1019 1020 if (isRegex && search.global) { 1021 // Fixes IE, Safari bug (last tested IE 9, Safari 5.1) 1022 search.lastIndex = 0; 1023 } 1024 1025 return result; 1026 }; 1027 1028 /** 1029 * Performs batch processing of string replacements. Used like {@link #XRegExp.replace}, but 1030 * accepts an array of replacement details. Later replacements operate on the output of earlier 1031 * replacements. Replacement details are accepted as an array with a regex or string to search for, 1032 * the replacement string or function, and an optional scope of 'one' or 'all'. Uses the XRegExp 1033 * replacement text syntax, which supports named backreference properties via `${name}`. 1034 * @memberOf XRegExp 1035 * @param {String} str String to search. 1036 * @param {Array} replacements Array of replacement detail arrays. 1037 * @returns {String} New string with all replacements. 1038 * @example 1039 * 1040 * str = XRegExp.replaceEach(str, [ 1041 * [XRegExp('(?<name>a)'), 'z${name}'], 1042 * [/b/gi, 'y'], 1043 * [/c/g, 'x', 'one'], // scope 'one' overrides /g 1044 * [/d/, 'w', 'all'], // scope 'all' overrides lack of /g 1045 * ['e', 'v', 'all'], // scope 'all' allows replace-all for strings 1046 * [/f/g, function($0) { 1047 * return $0.toUpperCase(); 1048 * }] 1049 * ]); 1050 */ 1051 self.replaceEach = function(str, replacements) { 1052 var i, r; 1053 1054 for (i = 0; i < replacements.length; ++i) { 1055 r = replacements[i]; 1056 str = self.replace(str, r[0], r[1], r[2]); 1057 } 1058 1059 return str; 1060 }; 1061 1062 /** 1063 * Splits a string into an array of strings using a regex or string separator. Matches of the 1064 * separator are not included in the result array. However, if `separator` is a regex that contains 1065 * capturing groups, backreferences are spliced into the result each time `separator` is matched. 1066 * Fixes browser bugs compared to the native `String.prototype.split` and can be used reliably 1067 * cross-browser. 1068 * @memberOf XRegExp 1069 * @param {String} str String to split. 1070 * @param {RegExp|String} separator Regex or string to use for separating the string. 1071 * @param {Number} [limit] Maximum number of items to include in the result array. 1072 * @returns {Array} Array of substrings. 1073 * @example 1074 * 1075 * // Basic use 1076 * XRegExp.split('a b c', ' '); 1077 * // -> ['a', 'b', 'c'] 1078 * 1079 * // With limit 1080 * XRegExp.split('a b c', ' ', 2); 1081 * // -> ['a', 'b'] 1082 * 1083 * // Backreferences in result array 1084 * XRegExp.split('..word1..', /([a-z]+)(\d+)/i); 1085 * // -> ['..', 'word', '1', '..'] 1086 */ 1087 self.split = function(str, separator, limit) { 1088 return fixed.split.call(toObject(str), separator, limit); 1089 }; 1090 1091 /** 1092 * Executes a regex search in a specified string. Returns `true` or `false`. Optional `pos` and 1093 * `sticky` arguments specify the search start position, and whether the match must start at the 1094 * specified position only. The `lastIndex` property of the provided regex is not used, but is 1095 * updated for compatibility. Also fixes browser bugs compared to the native 1096 * `RegExp.prototype.test` and can be used reliably cross-browser. 1097 * @memberOf XRegExp 1098 * @param {String} str String to search. 1099 * @param {RegExp} regex Regex to search with. 1100 * @param {Number} [pos=0] Zero-based index at which to start the search. 1101 * @param {Boolean|String} [sticky=false] Whether the match must start at the specified position 1102 * only. The string `'sticky'` is accepted as an alternative to `true`. 1103 * @returns {Boolean} Whether the regex matched the provided value. 1104 * @example 1105 * 1106 * // Basic use 1107 * XRegExp.test('abc', /c/); // -> true 1108 * 1109 * // With pos and sticky 1110 * XRegExp.test('abc', /c/, 0, 'sticky'); // -> false 1111 */ 1112 self.test = function(str, regex, pos, sticky) { 1113 // Do this the easy way :-) 1114 return !!self.exec(str, regex, pos, sticky); 1115 }; 1116 1117 /** 1118 * Uninstalls optional features according to the specified options. All optional features start out 1119 * uninstalled, so this is used to undo the actions of {@link #XRegExp.install}. 1120 * @memberOf XRegExp 1121 * @param {Object|String} options Options object or string. 1122 * @example 1123 * 1124 * // With an options object 1125 * XRegExp.uninstall({ 1126 * // Disables support for astral code points in Unicode addons 1127 * astral: true, 1128 * 1129 * // Restores native regex methods 1130 * natives: true 1131 * }); 1132 * 1133 * // With an options string 1134 * XRegExp.uninstall('astral natives'); 1135 */ 1136 self.uninstall = function(options) { 1137 options = prepareOptions(options); 1138 1139 if (features.astral && options.astral) { 1140 setAstral(false); 1141 } 1142 1143 if (features.natives && options.natives) { 1144 setNatives(false); 1145 } 1146 }; 1147 1148 /** 1149 * Returns an XRegExp object that is the union of the given patterns. Patterns can be provided as 1150 * regex objects or strings. Metacharacters are escaped in patterns provided as strings. 1151 * Backreferences in provided regex objects are automatically renumbered to work correctly within 1152 * the larger combined pattern. Native flags used by provided regexes are ignored in favor of the 1153 * `flags` argument. 1154 * @memberOf XRegExp 1155 * @param {Array} patterns Regexes and strings to combine. 1156 * @param {String} [flags] Any combination of XRegExp flags. 1157 * @returns {RegExp} Union of the provided regexes and strings. 1158 * @example 1159 * 1160 * XRegExp.union(['a+b*c', /(dogs)\1/, /(cats)\1/], 'i'); 1161 * // -> /a\+b\*c|(dogs)\1|(cats)\2/i 1162 */ 1163 self.union = function(patterns, flags) { 1164 var parts = /(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*]/g, 1165 output = [], 1166 numCaptures = 0, 1167 numPriorCaptures, 1168 captureNames, 1169 pattern, 1170 rewrite = function(match, paren, backref) { 1171 var name = captureNames[numCaptures - numPriorCaptures]; 1172 1173 // Capturing group 1174 if (paren) { 1175 ++numCaptures; 1176 // If the current capture has a name, preserve the name 1177 if (name) { 1178 return '(?<' + name + '>'; 1179 } 1180 // Backreference 1181 } else if (backref) { 1182 // Rewrite the backreference 1183 return '\\' + (+backref + numPriorCaptures); 1184 } 1185 1186 return match; 1187 }, 1188 i; 1189 1190 if (!(isType(patterns, 'Array') && patterns.length)) { 1191 throw new TypeError('Must provide a nonempty array of patterns to merge'); 1192 } 1193 1194 for (i = 0; i < patterns.length; ++i) { 1195 pattern = patterns[i]; 1196 1197 if (self.isRegExp(pattern)) { 1198 numPriorCaptures = numCaptures; 1199 captureNames = (pattern[REGEX_DATA] && pattern[REGEX_DATA].captureNames) || []; 1200 1201 // Rewrite backreferences. Passing to XRegExp dies on octals and ensures patterns 1202 // are independently valid; helps keep this simple. Named captures are put back 1203 output.push(nativ.replace.call(self(pattern.source).source, parts, rewrite)); 1204 } else { 1205 output.push(self.escape(pattern)); 1206 } 1207 } 1208 1209 return self(output.join('|'), flags); 1210 }; 1211 1212 /* ============================== 1213 * Fixed/extended native methods 1214 * ============================== */ 1215 1216 /** 1217 * Adds named capture support (with backreferences returned as `result.name`), and fixes browser 1218 * bugs in the native `RegExp.prototype.exec`. Calling `XRegExp.install('natives')` uses this to 1219 * override the native method. Use via `XRegExp.exec` without overriding natives. 1220 * @private 1221 * @param {String} str String to search. 1222 * @returns {Array} Match array with named backreference properties, or `null`. 1223 */ 1224 fixed.exec = function(str) { 1225 var origLastIndex = this.lastIndex, 1226 match = nativ.exec.apply(this, arguments), 1227 name, 1228 r2, 1229 i; 1230 1231 if (match) { 1232 // Fix browsers whose `exec` methods don't return `undefined` for nonparticipating 1233 // capturing groups. This fixes IE 5.5-8, but not IE 9's quirks mode or emulation of 1234 // older IEs. IE 9 in standards mode follows the spec 1235 if (!correctExecNpcg && match.length > 1 && indexOf(match, '') > -1) { 1236 r2 = copy(this, {remove: 'g'}); 1237 // Using `str.slice(match.index)` rather than `match[0]` in case lookahead allowed 1238 // matching due to characters outside the match 1239 nativ.replace.call(String(str).slice(match.index), r2, function() { 1240 var len = arguments.length, i; 1241 // Skip index 0 and the last 2 1242 for (i = 1; i < len - 2; ++i) { 1243 if (arguments[i] === undefined) { 1244 match[i] = undefined; 1245 } 1246 } 1247 }); 1248 } 1249 1250 // Attach named capture properties 1251 if (this[REGEX_DATA] && this[REGEX_DATA].captureNames) { 1252 // Skip index 0 1253 for (i = 1; i < match.length; ++i) { 1254 name = this[REGEX_DATA].captureNames[i - 1]; 1255 if (name) { 1256 match[name] = match[i]; 1257 } 1258 } 1259 } 1260 1261 // Fix browsers that increment `lastIndex` after zero-length matches 1262 if (this.global && !match[0].length && (this.lastIndex > match.index)) { 1263 this.lastIndex = match.index; 1264 } 1265 } 1266 1267 if (!this.global) { 1268 // Fixes IE, Opera bug (last tested IE 9, Opera 11.6) 1269 this.lastIndex = origLastIndex; 1270 } 1271 1272 return match; 1273 }; 1274 1275 /** 1276 * Fixes browser bugs in the native `RegExp.prototype.test`. Calling `XRegExp.install('natives')` 1277 * uses this to override the native method. 1278 * @private 1279 * @param {String} str String to search. 1280 * @returns {Boolean} Whether the regex matched the provided value. 1281 */ 1282 fixed.test = function(str) { 1283 // Do this the easy way :-) 1284 return !!fixed.exec.call(this, str); 1285 }; 1286 1287 /** 1288 * Adds named capture support (with backreferences returned as `result.name`), and fixes browser 1289 * bugs in the native `String.prototype.match`. Calling `XRegExp.install('natives')` uses this to 1290 * override the native method. 1291 * @private 1292 * @param {RegExp|*} regex Regex to search with. If not a regex object, it is passed to `RegExp`. 1293 * @returns {Array} If `regex` uses flag g, an array of match strings or `null`. Without flag g, 1294 * the result of calling `regex.exec(this)`. 1295 */ 1296 fixed.match = function(regex) { 1297 var result; 1298 1299 if (!self.isRegExp(regex)) { 1300 // Use the native `RegExp` rather than `XRegExp` 1301 regex = new RegExp(regex); 1302 } else if (regex.global) { 1303 result = nativ.match.apply(this, arguments); 1304 // Fixes IE bug 1305 regex.lastIndex = 0; 1306 1307 return result; 1308 } 1309 1310 return fixed.exec.call(regex, toObject(this)); 1311 }; 1312 1313 /** 1314 * Adds support for `${n}` tokens for named and numbered backreferences in replacement text, and 1315 * provides named backreferences to replacement functions as `arguments[0].name`. Also fixes 1316 * browser bugs in replacement text syntax when performing a replacement using a nonregex search 1317 * value, and the value of a replacement regex's `lastIndex` property during replacement iterations 1318 * and upon completion. Note that this doesn't support SpiderMonkey's proprietary third (`flags`) 1319 * argument. Calling `XRegExp.install('natives')` uses this to override the native method. Use via 1320 * `XRegExp.replace` without overriding natives. 1321 * @private 1322 * @param {RegExp|String} search Search pattern to be replaced. 1323 * @param {String|Function} replacement Replacement string or a function invoked to create it. 1324 * @returns {String} New string with one or all matches replaced. 1325 */ 1326 fixed.replace = function(search, replacement) { 1327 var isRegex = self.isRegExp(search), 1328 origLastIndex, 1329 captureNames, 1330 result; 1331 1332 if (isRegex) { 1333 if (search[REGEX_DATA]) { 1334 captureNames = search[REGEX_DATA].captureNames; 1335 } 1336 // Only needed if `search` is nonglobal 1337 origLastIndex = search.lastIndex; 1338 } else { 1339 search += ''; // Type-convert 1340 } 1341 1342 // Don't use `typeof`; some older browsers return 'function' for regex objects 1343 if (isType(replacement, 'Function')) { 1344 // Stringifying `this` fixes a bug in IE < 9 where the last argument in replacement 1345 // functions isn't type-converted to a string 1346 result = nativ.replace.call(String(this), search, function() { 1347 var args = arguments, i; 1348 if (captureNames) { 1349 // Change the `arguments[0]` string primitive to a `String` object that can 1350 // store properties. This really does need to use `String` as a constructor 1351 args[0] = new String(args[0]); 1352 // Store named backreferences on the first argument 1353 for (i = 0; i < captureNames.length; ++i) { 1354 if (captureNames[i]) { 1355 args[0][captureNames[i]] = args[i + 1]; 1356 } 1357 } 1358 } 1359 // Update `lastIndex` before calling `replacement`. Fixes IE, Chrome, Firefox, 1360 // Safari bug (last tested IE 9, Chrome 17, Firefox 11, Safari 5.1) 1361 if (isRegex && search.global) { 1362 search.lastIndex = args[args.length - 2] + args[0].length; 1363 } 1364 // Should pass `undefined` as context; see 1365 // <https://bugs.ecmascript.org/show_bug.cgi?id=154> 1366 return replacement.apply(undefined, args); 1367 }); 1368 } else { 1369 // Ensure that the last value of `args` will be a string when given nonstring `this`, 1370 // while still throwing on `null` or `undefined` context 1371 result = nativ.replace.call(this == null ? this : String(this), search, function() { 1372 // Keep this function's `arguments` available through closure 1373 var args = arguments; 1374 return nativ.replace.call(String(replacement), replacementToken, function($0, $1, $2) { 1375 var n; 1376 // Named or numbered backreference with curly braces 1377 if ($1) { 1378 /* XRegExp behavior for `${n}`: 1379 * 1. Backreference to numbered capture, if `n` is an integer. Use `0` for 1380 * for the entire match. Any number of leading zeros may be used. 1381 * 2. Backreference to named capture `n`, if it exists and is not an 1382 * integer overridden by numbered capture. In practice, this does not 1383 * overlap with numbered capture since XRegExp does not allow named 1384 * capture to use a bare integer as the name. 1385 * 3. If the name or number does not refer to an existing capturing group, 1386 * it's an error. 1387 */ 1388 n = +$1; // Type-convert; drop leading zeros 1389 if (n <= args.length - 3) { 1390 return args[n] || ''; 1391 } 1392 // Groups with the same name is an error, else would need `lastIndexOf` 1393 n = captureNames ? indexOf(captureNames, $1) : -1; 1394 if (n < 0) { 1395 throw new SyntaxError('Backreference to undefined group ' + $0); 1396 } 1397 return args[n + 1] || ''; 1398 } 1399 // Else, special variable or numbered backreference without curly braces 1400 if ($2 === '$') { // $$ 1401 return '$'; 1402 } 1403 if ($2 === '&' || +$2 === 0) { // $&, $0 (not followed by 1-9), $00 1404 return args[0]; 1405 } 1406 if ($2 === '`') { // $` (left context) 1407 return args[args.length - 1].slice(0, args[args.length - 2]); 1408 } 1409 if ($2 === "'") { // $' (right context) 1410 return args[args.length - 1].slice(args[args.length - 2] + args[0].length); 1411 } 1412 // Else, numbered backreference without curly braces 1413 $2 = +$2; // Type-convert; drop leading zero 1414 /* XRegExp behavior for `$n` and `$nn`: 1415 * - Backrefs end after 1 or 2 digits. Use `${..}` for more digits. 1416 * - `$1` is an error if no capturing groups. 1417 * - `$10` is an error if less than 10 capturing groups. Use `${1}0` instead. 1418 * - `$01` is `$1` if at least one capturing group, else it's an error. 1419 * - `$0` (not followed by 1-9) and `$00` are the entire match. 1420 * Native behavior, for comparison: 1421 * - Backrefs end after 1 or 2 digits. Cannot reference capturing group 100+. 1422 * - `$1` is a literal `$1` if no capturing groups. 1423 * - `$10` is `$1` followed by a literal `0` if less than 10 capturing groups. 1424 * - `$01` is `$1` if at least one capturing group, else it's a literal `$01`. 1425 * - `$0` is a literal `$0`. 1426 */ 1427 if (!isNaN($2)) { 1428 if ($2 > args.length - 3) { 1429 throw new SyntaxError('Backreference to undefined group ' + $0); 1430 } 1431 return args[$2] || ''; 1432 } 1433 throw new SyntaxError('Invalid token ' + $0); 1434 }); 1435 }); 1436 } 1437 1438 if (isRegex) { 1439 if (search.global) { 1440 // Fixes IE, Safari bug (last tested IE 9, Safari 5.1) 1441 search.lastIndex = 0; 1442 } else { 1443 // Fixes IE, Opera bug (last tested IE 9, Opera 11.6) 1444 search.lastIndex = origLastIndex; 1445 } 1446 } 1447 1448 return result; 1449 }; 1450 1451 /** 1452 * Fixes browser bugs in the native `String.prototype.split`. Calling `XRegExp.install('natives')` 1453 * uses this to override the native method. Use via `XRegExp.split` without overriding natives. 1454 * @private 1455 * @param {RegExp|String} separator Regex or string to use for separating the string. 1456 * @param {Number} [limit] Maximum number of items to include in the result array. 1457 * @returns {Array} Array of substrings. 1458 */ 1459 fixed.split = function(separator, limit) { 1460 if (!self.isRegExp(separator)) { 1461 // Browsers handle nonregex split correctly, so use the faster native method 1462 return nativ.split.apply(this, arguments); 1463 } 1464 1465 var str = String(this), 1466 output = [], 1467 origLastIndex = separator.lastIndex, 1468 lastLastIndex = 0, 1469 lastLength; 1470 1471 /* Values for `limit`, per the spec: 1472 * If undefined: pow(2,32) - 1 1473 * If 0, Infinity, or NaN: 0 1474 * If positive number: limit = floor(limit); if (limit >= pow(2,32)) limit -= pow(2,32); 1475 * If negative number: pow(2,32) - floor(abs(limit)) 1476 * If other: Type-convert, then use the above rules 1477 */ 1478 // This line fails in very strange ways for some values of `limit` in Opera 10.5-10.63, 1479 // unless Opera Dragonfly is open (go figure). It works in at least Opera 9.5-10.1 and 11+ 1480 limit = (limit === undefined ? -1 : limit) >>> 0; 1481 1482 self.forEach(str, separator, function(match) { 1483 // This condition is not the same as `if (match[0].length)` 1484 if ((match.index + match[0].length) > lastLastIndex) { 1485 output.push(str.slice(lastLastIndex, match.index)); 1486 if (match.length > 1 && match.index < str.length) { 1487 Array.prototype.push.apply(output, match.slice(1)); 1488 } 1489 lastLength = match[0].length; 1490 lastLastIndex = match.index + lastLength; 1491 } 1492 }); 1493 1494 if (lastLastIndex === str.length) { 1495 if (!nativ.test.call(separator, '') || lastLength) { 1496 output.push(''); 1497 } 1498 } else { 1499 output.push(str.slice(lastLastIndex)); 1500 } 1501 1502 separator.lastIndex = origLastIndex; 1503 return output.length > limit ? output.slice(0, limit) : output; 1504 }; 1505 1506 /* ============================== 1507 * Built-in syntax/flag tokens 1508 * ============================== */ 1509 1510 add = self.addToken; 1511 1512 /* Letter identity escapes that natively match literal characters: `\a`, `\A`, etc. These should be 1513 * SyntaxErrors but are allowed in web reality. XRegExp makes them errors for cross-browser 1514 * consistency and to reserve their syntax, but lets them be superseded by addons. 1515 */ 1516 add( 1517 /\\([ABCE-RTUVXYZaeg-mopqyz]|c(?![A-Za-z])|u(?![\dA-Fa-f]{4})|x(?![\dA-Fa-f]{2}))/, 1518 function(match, scope) { 1519 // \B is allowed in default scope only 1520 if (match[1] === 'B' && scope === defaultScope) { 1521 return match[0]; 1522 } 1523 throw new SyntaxError('Invalid escape ' + match[0]); 1524 }, 1525 {scope: 'all'} 1526 ); 1527 1528 /* Empty character class: `[]` or `[^]`. This fixes a critical cross-browser syntax inconsistency. 1529 * Unless this is standardized (per the ES spec), regex syntax can't be accurately parsed because 1530 * character class endings can't be determined. 1531 */ 1532 add( 1533 /\[(\^?)]/, 1534 function(match) { 1535 // For cross-browser compatibility with ES3, convert [] to \b\B and [^] to [\s\S]. 1536 // (?!) should work like \b\B, but is unreliable in some versions of Firefox 1537 return match[1] ? '[\\s\\S]' : '\\b\\B'; 1538 } 1539 ); 1540 1541 /* Comment pattern: `(?# )`. Inline comments are an alternative to the line comments allowed in 1542 * free-spacing mode (flag x). 1543 */ 1544 add( 1545 /\(\?#[^)]*\)/, 1546 function(match, scope, flags) { 1547 // Keep tokens separated unless the following token is a quantifier 1548 return isQuantifierNext(match.input, match.index + match[0].length, flags) ? 1549 '' : '(?:)'; 1550 } 1551 ); 1552 1553 /* Whitespace and line comments, in free-spacing mode (aka extended mode, flag x) only. 1554 */ 1555 add( 1556 /\s+|#.*/, 1557 function(match, scope, flags) { 1558 // Keep tokens separated unless the following token is a quantifier 1559 return isQuantifierNext(match.input, match.index + match[0].length, flags) ? 1560 '' : '(?:)'; 1561 }, 1562 {flag: 'x'} 1563 ); 1564 1565 /* Dot, in dotall mode (aka singleline mode, flag s) only. 1566 */ 1567 add( 1568 /\./, 1569 function() { 1570 return '[\\s\\S]'; 1571 }, 1572 {flag: 's'} 1573 ); 1574 1575 /* Named backreference: `\k<name>`. Backreference names can use the characters A-Z, a-z, 0-9, _, 1576 * and $ only. Also allows numbered backreferences as `\k<n>`. 1577 */ 1578 add( 1579 /\\k<([\w$]+)>/, 1580 function(match) { 1581 // Groups with the same name is an error, else would need `lastIndexOf` 1582 var index = isNaN(match[1]) ? (indexOf(this.captureNames, match[1]) + 1) : +match[1], 1583 endIndex = match.index + match[0].length; 1584 if (!index || index > this.captureNames.length) { 1585 throw new SyntaxError('Backreference to undefined group ' + match[0]); 1586 } 1587 // Keep backreferences separate from subsequent literal numbers 1588 return '\\' + index + ( 1589 endIndex === match.input.length || isNaN(match.input.charAt(endIndex)) ? 1590 '' : '(?:)' 1591 ); 1592 } 1593 ); 1594 1595 /* Numbered backreference or octal, plus any following digits: `\0`, `\11`, etc. Octals except `\0` 1596 * not followed by 0-9 and backreferences to unopened capture groups throw an error. Other matches 1597 * are returned unaltered. IE < 9 doesn't support backreferences above `\99` in regex syntax. 1598 */ 1599 add( 1600 /\\(\d+)/, 1601 function(match, scope) { 1602 if ( 1603 !( 1604 scope === defaultScope && 1605 /^[1-9]/.test(match[1]) && 1606 +match[1] <= this.captureNames.length 1607 ) && 1608 match[1] !== '0' 1609 ) { 1610 throw new SyntaxError('Cannot use octal escape or backreference to undefined group ' + 1611 match[0]); 1612 } 1613 return match[0]; 1614 }, 1615 {scope: 'all'} 1616 ); 1617 1618 /* Named capturing group; match the opening delimiter only: `(?<name>`. Capture names can use the 1619 * characters A-Z, a-z, 0-9, _, and $ only. Names can't be integers. Supports Python-style 1620 * `(?P<name>` as an alternate syntax to avoid issues in recent Opera (which natively supports the 1621 * Python-style syntax). Otherwise, XRegExp might treat numbered backreferences to Python-style 1622 * named capture as octals. 1623 */ 1624 add( 1625 /\(\?P?<([\w$]+)>/, 1626 function(match) { 1627 // Disallow bare integers as names because named backreferences are added to match 1628 // arrays and therefore numeric properties may lead to incorrect lookups 1629 if (!isNaN(match[1])) { 1630 throw new SyntaxError('Cannot use integer as capture name ' + match[0]); 1631 } 1632 if (match[1] === 'length' || match[1] === '__proto__') { 1633 throw new SyntaxError('Cannot use reserved word as capture name ' + match[0]); 1634 } 1635 if (indexOf(this.captureNames, match[1]) > -1) { 1636 throw new SyntaxError('Cannot use same name for multiple groups ' + match[0]); 1637 } 1638 this.captureNames.push(match[1]); 1639 this.hasNamedCapture = true; 1640 return '('; 1641 } 1642 ); 1643 1644 /* Capturing group; match the opening parenthesis only. Required for support of named capturing 1645 * groups. Also adds explicit capture mode (flag n). 1646 */ 1647 add( 1648 /\((?!\?)/, 1649 function(match, scope, flags) { 1650 if (flags.indexOf('n') > -1) { 1651 return '(?:'; 1652 } 1653 this.captureNames.push(null); 1654 return '('; 1655 }, 1656 {optionalFlags: 'n'} 1657 ); 1658 1659 /* ============================== 1660 * Expose XRegExp 1661 * ============================== */ 1662 1663 return self; 1664 1665 }());