matchrecursive.js (7216B)
1 /*! 2 * XRegExp.matchRecursive 3.0.0-pre 3 * <http://xregexp.com/> 4 * Steven Levithan © 2009-2012 MIT License 5 */ 6 7 (function(XRegExp) { 8 'use strict'; 9 10 /** 11 * Returns a match detail object composed of the provided values. 12 * @private 13 */ 14 function row(name, value, start, end) { 15 return { 16 name: name, 17 value: value, 18 start: start, 19 end: end 20 }; 21 } 22 23 /** 24 * Returns an array of match strings between outermost left and right delimiters, or an array of 25 * objects with detailed match parts and position data. An error is thrown if delimiters are 26 * unbalanced within the data. 27 * @memberOf XRegExp 28 * @param {String} str String to search. 29 * @param {String} left Left delimiter as an XRegExp pattern. 30 * @param {String} right Right delimiter as an XRegExp pattern. 31 * @param {String} [flags] Any native or XRegExp flags, used for the left and right delimiters. 32 * @param {Object} [options] Lets you specify `valueNames` and `escapeChar` options. 33 * @returns {Array} Array of matches, or an empty array. 34 * @example 35 * 36 * // Basic usage 37 * var str = '(t((e))s)t()(ing)'; 38 * XRegExp.matchRecursive(str, '\\(', '\\)', 'g'); 39 * // -> ['t((e))s', '', 'ing'] 40 * 41 * // Extended information mode with valueNames 42 * str = 'Here is <div> <div>an</div></div> example'; 43 * XRegExp.matchRecursive(str, '<div\\s*>', '</div>', 'gi', { 44 * valueNames: ['between', 'left', 'match', 'right'] 45 * }); 46 * // -> [ 47 * // {name: 'between', value: 'Here is ', start: 0, end: 8}, 48 * // {name: 'left', value: '<div>', start: 8, end: 13}, 49 * // {name: 'match', value: ' <div>an</div>', start: 13, end: 27}, 50 * // {name: 'right', value: '</div>', start: 27, end: 33}, 51 * // {name: 'between', value: ' example', start: 33, end: 41} 52 * // ] 53 * 54 * // Omitting unneeded parts with null valueNames, and using escapeChar 55 * str = '...{1}\\{{function(x,y){return y+x;}}'; 56 * XRegExp.matchRecursive(str, '{', '}', 'g', { 57 * valueNames: ['literal', null, 'value', null], 58 * escapeChar: '\\' 59 * }); 60 * // -> [ 61 * // {name: 'literal', value: '...', start: 0, end: 3}, 62 * // {name: 'value', value: '1', start: 4, end: 5}, 63 * // {name: 'literal', value: '\\{', start: 6, end: 8}, 64 * // {name: 'value', value: 'function(x,y){return y+x;}', start: 9, end: 35} 65 * // ] 66 * 67 * // Sticky mode via flag y 68 * str = '<1><<<2>>><3>4<5>'; 69 * XRegExp.matchRecursive(str, '<', '>', 'gy'); 70 * // -> ['1', '<<2>>', '3'] 71 */ 72 XRegExp.matchRecursive = function(str, left, right, flags, options) { 73 flags = flags || ''; 74 options = options || {}; 75 var global = flags.indexOf('g') > -1, 76 sticky = flags.indexOf('y') > -1, 77 // Flag `y` is controlled internally 78 basicFlags = flags.replace(/y/g, ''), 79 escapeChar = options.escapeChar, 80 vN = options.valueNames, 81 output = [], 82 openTokens = 0, 83 delimStart = 0, 84 delimEnd = 0, 85 lastOuterEnd = 0, 86 outerStart, 87 innerStart, 88 leftMatch, 89 rightMatch, 90 esc; 91 left = XRegExp(left, basicFlags); 92 right = XRegExp(right, basicFlags); 93 94 if (escapeChar) { 95 if (escapeChar.length > 1) { 96 throw new Error('Cannot use more than one escape character'); 97 } 98 escapeChar = XRegExp.escape(escapeChar); 99 // Using `XRegExp.union` safely rewrites backreferences in `left` and `right` 100 esc = new RegExp( 101 '(?:' + escapeChar + '[\\S\\s]|(?:(?!' + 102 XRegExp.union([left, right]).source + 103 ')[^' + escapeChar + '])+)+', 104 // Flags `gy` not needed here 105 flags.replace(/[^im]+/g, '') 106 ); 107 } 108 109 while (true) { 110 // If using an escape character, advance to the delimiter's next starting position, 111 // skipping any escaped characters in between 112 if (escapeChar) { 113 delimEnd += (XRegExp.exec(str, esc, delimEnd, 'sticky') || [''])[0].length; 114 } 115 leftMatch = XRegExp.exec(str, left, delimEnd); 116 rightMatch = XRegExp.exec(str, right, delimEnd); 117 // Keep the leftmost match only 118 if (leftMatch && rightMatch) { 119 if (leftMatch.index <= rightMatch.index) { 120 rightMatch = null; 121 } else { 122 leftMatch = null; 123 } 124 } 125 /* Paths (LM: leftMatch, RM: rightMatch, OT: openTokens): 126 * LM | RM | OT | Result 127 * 1 | 0 | 1 | loop 128 * 1 | 0 | 0 | loop 129 * 0 | 1 | 1 | loop 130 * 0 | 1 | 0 | throw 131 * 0 | 0 | 1 | throw 132 * 0 | 0 | 0 | break 133 * Doesn't include the sticky mode special case. The loop ends after the first 134 * completed match if not `global`. 135 */ 136 if (leftMatch || rightMatch) { 137 delimStart = (leftMatch || rightMatch).index; 138 delimEnd = delimStart + (leftMatch || rightMatch)[0].length; 139 } else if (!openTokens) { 140 break; 141 } 142 if (sticky && !openTokens && delimStart > lastOuterEnd) { 143 break; 144 } 145 if (leftMatch) { 146 if (!openTokens) { 147 outerStart = delimStart; 148 innerStart = delimEnd; 149 } 150 ++openTokens; 151 } else if (rightMatch && openTokens) { 152 if (!--openTokens) { 153 if (vN) { 154 if (vN[0] && outerStart > lastOuterEnd) { 155 output.push(row(vN[0], str.slice(lastOuterEnd, outerStart), lastOuterEnd, outerStart)); 156 } 157 if (vN[1]) { 158 output.push(row(vN[1], str.slice(outerStart, innerStart), outerStart, innerStart)); 159 } 160 if (vN[2]) { 161 output.push(row(vN[2], str.slice(innerStart, delimStart), innerStart, delimStart)); 162 } 163 if (vN[3]) { 164 output.push(row(vN[3], str.slice(delimStart, delimEnd), delimStart, delimEnd)); 165 } 166 } else { 167 output.push(str.slice(innerStart, delimStart)); 168 } 169 lastOuterEnd = delimEnd; 170 if (!global) { 171 break; 172 } 173 } 174 } else { 175 throw new Error('Unbalanced delimiter found in string'); 176 } 177 // If the delimiter matched an empty string, avoid an infinite loop 178 if (delimStart === delimEnd) { 179 ++delimEnd; 180 } 181 } 182 183 if (global && !sticky && vN && vN[0] && str.length > lastOuterEnd) { 184 output.push(row(vN[0], str.slice(lastOuterEnd), lastOuterEnd, str.length)); 185 } 186 187 return output; 188 }; 189 190 }(XRegExp));