compileBooleanMatcher.js 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /*
  2. MIT License http://www.opensource.org/licenses/mit-license.php
  3. Author Tobias Koppers @sokra
  4. */
  5. "use strict";
  6. /**
  7. * @param {string} str string
  8. * @returns {string} quoted meta
  9. */
  10. const quoteMeta = (str) => str.replace(/[-[\]\\/{}()*+?.^$|]/g, "\\$&");
  11. /**
  12. * @param {string} char character to escape for use in character class
  13. * @returns {string} escaped character
  14. */
  15. const quoteMetaInCharClass = (char) => {
  16. // In character class, only these need escaping: ] \ ^ -
  17. if (char === "]" || char === "\\" || char === "^" || char === "-") {
  18. return `\\${char}`;
  19. }
  20. return char;
  21. };
  22. /**
  23. * Converts an array of single characters into an optimized character class string
  24. * using ranges where possible. E.g., ["1","2","3","4","a"] => "1-4a"
  25. * @param {string[]} chars array of single characters (should be sorted)
  26. * @returns {string} optimized character class content (without the brackets)
  27. */
  28. const charsToCharClassContent = (chars) => {
  29. if (chars.length === 0) return "";
  30. if (chars.length === 1) return quoteMetaInCharClass(chars[0]);
  31. // Sort by char code
  32. const sorted = [...chars].sort((a, b) => a.charCodeAt(0) - b.charCodeAt(0));
  33. /** @type {string[]} */
  34. const parts = [];
  35. let rangeStart = sorted[0];
  36. let rangeEnd = sorted[0];
  37. for (let i = 1; i < sorted.length; i++) {
  38. const char = sorted[i];
  39. const prevCode = rangeEnd.charCodeAt(0);
  40. const currCode = char.charCodeAt(0);
  41. if (currCode === prevCode + 1) {
  42. // Extend the range
  43. rangeEnd = char;
  44. } else {
  45. // Flush the current range
  46. parts.push(formatRange(rangeStart, rangeEnd));
  47. rangeStart = char;
  48. rangeEnd = char;
  49. }
  50. }
  51. // Flush the last range
  52. parts.push(formatRange(rangeStart, rangeEnd));
  53. return parts.join("");
  54. };
  55. /**
  56. * Formats a range of characters for use in a character class
  57. * @param {string} start start character
  58. * @param {string} end end character
  59. * @returns {string} formatted range
  60. */
  61. const formatRange = (start, end) => {
  62. const startCode = start.charCodeAt(0);
  63. const endCode = end.charCodeAt(0);
  64. const length = endCode - startCode + 1;
  65. if (length === 1) {
  66. return quoteMetaInCharClass(start);
  67. }
  68. if (length === 2) {
  69. // For 2 chars, just list them (e.g., "ab" instead of "a-b")
  70. return quoteMetaInCharClass(start) + quoteMetaInCharClass(end);
  71. }
  72. // For 3+ chars, use range notation
  73. return `${quoteMetaInCharClass(start)}-${quoteMetaInCharClass(end)}`;
  74. };
  75. /**
  76. * @param {string} str string
  77. * @returns {string} string
  78. */
  79. const toSimpleString = (str) => {
  80. if (`${Number(str)}` === str) {
  81. return str;
  82. }
  83. return JSON.stringify(str);
  84. };
  85. /**
  86. * @param {Record<string | number, boolean>} map value map
  87. * @returns {boolean | ((value: string) => string)} true/false, when unconditionally true/false, or a template function to determine the value at runtime
  88. */
  89. const compileBooleanMatcher = (map) => {
  90. const positiveItems = Object.keys(map).filter((i) => map[i]);
  91. const negativeItems = Object.keys(map).filter((i) => !map[i]);
  92. if (positiveItems.length === 0) return false;
  93. if (negativeItems.length === 0) return true;
  94. return compileBooleanMatcherFromLists(positiveItems, negativeItems);
  95. };
  96. /**
  97. * @param {string[]} positiveItems positive items
  98. * @param {string[]} negativeItems negative items
  99. * @returns {(value: string) => string} a template function to determine the value at runtime
  100. */
  101. const compileBooleanMatcherFromLists = (positiveItems, negativeItems) => {
  102. if (positiveItems.length === 0) return () => "false";
  103. if (negativeItems.length === 0) return () => "true";
  104. if (positiveItems.length === 1) {
  105. return (value) => `${toSimpleString(positiveItems[0])} == ${value}`;
  106. }
  107. if (negativeItems.length === 1) {
  108. return (value) => `${toSimpleString(negativeItems[0])} != ${value}`;
  109. }
  110. const positiveRegexp = itemsToRegexp(positiveItems);
  111. const negativeRegexp = itemsToRegexp(negativeItems);
  112. if (positiveRegexp.length <= negativeRegexp.length) {
  113. return (value) => `/^${positiveRegexp}$/.test(${value})`;
  114. }
  115. return (value) => `!/^${negativeRegexp}$/.test(${value})`;
  116. };
  117. /** @typedef {string[][]} ListOfCommonItems */
  118. /**
  119. * @param {Set<string>} itemsSet items set
  120. * @param {(str: string) => string | false} getKey get key function
  121. * @param {(str: string[]) => boolean} condition condition
  122. * @returns {ListOfCommonItems} list of common items
  123. */
  124. const popCommonItems = (itemsSet, getKey, condition) => {
  125. /** @type {Map<string, string[]>} */
  126. const map = new Map();
  127. for (const item of itemsSet) {
  128. const key = getKey(item);
  129. if (key) {
  130. let list = map.get(key);
  131. if (list === undefined) {
  132. /** @type {string[]} */
  133. list = [];
  134. map.set(key, list);
  135. }
  136. list.push(item);
  137. }
  138. }
  139. /** @type {ListOfCommonItems} */
  140. const result = [];
  141. for (const list of map.values()) {
  142. if (condition(list)) {
  143. for (const item of list) {
  144. itemsSet.delete(item);
  145. }
  146. result.push(list);
  147. }
  148. }
  149. return result;
  150. };
  151. /**
  152. * @param {string[]} items items
  153. * @returns {string} common prefix
  154. */
  155. const getCommonPrefix = (items) => {
  156. let prefix = items[0];
  157. for (let i = 1; i < items.length; i++) {
  158. const item = items[i];
  159. for (let p = 0; p < prefix.length; p++) {
  160. if (item[p] !== prefix[p]) {
  161. prefix = prefix.slice(0, p);
  162. break;
  163. }
  164. }
  165. }
  166. return prefix;
  167. };
  168. /**
  169. * @param {string[]} items items
  170. * @returns {string} common suffix
  171. */
  172. const getCommonSuffix = (items) => {
  173. let suffix = items[0];
  174. for (let i = 1; i < items.length; i++) {
  175. const item = items[i];
  176. for (let p = item.length - 1, s = suffix.length - 1; s >= 0; p--, s--) {
  177. if (item[p] !== suffix[s]) {
  178. suffix = suffix.slice(s + 1);
  179. break;
  180. }
  181. }
  182. }
  183. return suffix;
  184. };
  185. /**
  186. * @param {string[]} itemsArr array of items
  187. * @returns {string} regexp
  188. */
  189. const itemsToRegexp = (itemsArr) => {
  190. if (itemsArr.length === 1) {
  191. return quoteMeta(itemsArr[0]);
  192. }
  193. /** @type {string[]} */
  194. const finishedItems = [];
  195. // merge single char items: (a|b|c|d|ef) => ([abcd]|ef)
  196. let countOfSingleCharItems = 0;
  197. for (const item of itemsArr) {
  198. if (item.length === 1) {
  199. countOfSingleCharItems++;
  200. }
  201. }
  202. // special case for only single char items
  203. if (countOfSingleCharItems === itemsArr.length) {
  204. return `[${charsToCharClassContent(itemsArr)}]`;
  205. }
  206. /** @type {Set<string>} */
  207. const items = new Set(itemsArr.sort());
  208. if (countOfSingleCharItems > 2) {
  209. /** @type {string[]} */
  210. const singleCharItems = [];
  211. for (const item of items) {
  212. if (item.length === 1) {
  213. singleCharItems.push(item);
  214. items.delete(item);
  215. }
  216. }
  217. finishedItems.push(`[${charsToCharClassContent(singleCharItems)}]`);
  218. }
  219. // special case for 2 items with common prefix/suffix
  220. if (finishedItems.length === 0 && items.size === 2) {
  221. const prefix = getCommonPrefix(itemsArr);
  222. const suffix = getCommonSuffix(
  223. itemsArr.map((item) => item.slice(prefix.length))
  224. );
  225. if (prefix.length > 0 || suffix.length > 0) {
  226. return `${quoteMeta(prefix)}${itemsToRegexp(
  227. itemsArr.map((i) => i.slice(prefix.length, -suffix.length || undefined))
  228. )}${quoteMeta(suffix)}`;
  229. }
  230. }
  231. // special case for 2 items with common suffix
  232. if (finishedItems.length === 0 && items.size === 2) {
  233. /** @type {Iterator<string>} */
  234. const it = items[Symbol.iterator]();
  235. const a = it.next().value;
  236. const b = it.next().value;
  237. if (a.length > 0 && b.length > 0 && a.slice(-1) === b.slice(-1)) {
  238. return `${itemsToRegexp([a.slice(0, -1), b.slice(0, -1)])}${quoteMeta(
  239. a.slice(-1)
  240. )}`;
  241. }
  242. }
  243. // find common prefix: (a1|a2|a3|a4|b5) => (a(1|2|3|4)|b5)
  244. const prefixed = popCommonItems(
  245. items,
  246. (item) => (item.length >= 1 ? item[0] : false),
  247. (list) => {
  248. if (list.length >= 3) return true;
  249. if (list.length <= 1) return false;
  250. return list[0][1] === list[1][1];
  251. }
  252. );
  253. for (const prefixedItems of prefixed) {
  254. const prefix = getCommonPrefix(prefixedItems);
  255. finishedItems.push(
  256. `${quoteMeta(prefix)}${itemsToRegexp(
  257. prefixedItems.map((i) => i.slice(prefix.length))
  258. )}`
  259. );
  260. }
  261. // find common suffix: (a1|b1|c1|d1|e2) => ((a|b|c|d)1|e2)
  262. const suffixed = popCommonItems(
  263. items,
  264. (item) => (item.length >= 1 ? item.slice(-1) : false),
  265. (list) => {
  266. if (list.length >= 3) return true;
  267. if (list.length <= 1) return false;
  268. return list[0].slice(-2) === list[1].slice(-2);
  269. }
  270. );
  271. for (const suffixedItems of suffixed) {
  272. const suffix = getCommonSuffix(suffixedItems);
  273. finishedItems.push(
  274. `${itemsToRegexp(
  275. suffixedItems.map((i) => i.slice(0, -suffix.length))
  276. )}${quoteMeta(suffix)}`
  277. );
  278. }
  279. /** @type {string[]} */
  280. const conditional = [...finishedItems, ...Array.from(items, quoteMeta)];
  281. if (conditional.length === 1) return conditional[0];
  282. return `(${conditional.join("|")})`;
  283. };
  284. compileBooleanMatcher.fromLists = compileBooleanMatcherFromLists;
  285. compileBooleanMatcher.itemsToRegexp = itemsToRegexp;
  286. module.exports = compileBooleanMatcher;