compileBooleanMatcher.js 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. MIT License http://www.opensource.org/licenses/mit-license.php
  3. Author Tobias Koppers @sokra
  4. */
  5. "use strict";
  6. /**
  7. * Returns quoted meta.
  8. * @param {string} str string
  9. * @returns {string} quoted meta
  10. */
  11. const quoteMeta = (str) => str.replace(/[-[\]\\/{}()*+?.^$|]/g, "\\$&");
  12. /**
  13. * Quote meta in char class.
  14. * @param {string} char character to escape for use in character class
  15. * @returns {string} escaped character
  16. */
  17. const quoteMetaInCharClass = (char) => {
  18. // In character class, only these need escaping: ] \ ^ -
  19. if (char === "]" || char === "\\" || char === "^" || char === "-") {
  20. return `\\${char}`;
  21. }
  22. return char;
  23. };
  24. /**
  25. * Converts an array of single characters into an optimized character class string
  26. * using ranges where possible. E.g., ["1","2","3","4","a"] => "1-4a"
  27. * @param {string[]} chars array of single characters (should be sorted)
  28. * @returns {string} optimized character class content (without the brackets)
  29. */
  30. const charsToCharClassContent = (chars) => {
  31. if (chars.length === 0) return "";
  32. if (chars.length === 1) return quoteMetaInCharClass(chars[0]);
  33. // Sort by char code
  34. const sorted = [...chars].sort((a, b) => a.charCodeAt(0) - b.charCodeAt(0));
  35. /** @type {string[]} */
  36. const parts = [];
  37. let rangeStart = sorted[0];
  38. let rangeEnd = sorted[0];
  39. for (let i = 1; i < sorted.length; i++) {
  40. const char = sorted[i];
  41. const prevCode = rangeEnd.charCodeAt(0);
  42. const currCode = char.charCodeAt(0);
  43. if (currCode === prevCode + 1) {
  44. // Extend the range
  45. rangeEnd = char;
  46. } else {
  47. // Flush the current range
  48. parts.push(formatRange(rangeStart, rangeEnd));
  49. rangeStart = char;
  50. rangeEnd = char;
  51. }
  52. }
  53. // Flush the last range
  54. parts.push(formatRange(rangeStart, rangeEnd));
  55. return parts.join("");
  56. };
  57. /**
  58. * Formats a range of characters for use in a character class
  59. * @param {string} start start character
  60. * @param {string} end end character
  61. * @returns {string} formatted range
  62. */
  63. const formatRange = (start, end) => {
  64. const startCode = start.charCodeAt(0);
  65. const endCode = end.charCodeAt(0);
  66. const length = endCode - startCode + 1;
  67. if (length === 1) {
  68. return quoteMetaInCharClass(start);
  69. }
  70. if (length === 2) {
  71. // For 2 chars, just list them (e.g., "ab" instead of "a-b")
  72. return quoteMetaInCharClass(start) + quoteMetaInCharClass(end);
  73. }
  74. // For 3+ chars, use range notation
  75. return `${quoteMetaInCharClass(start)}-${quoteMetaInCharClass(end)}`;
  76. };
  77. /**
  78. * Returns string.
  79. * @param {string} str string
  80. * @returns {string} string
  81. */
  82. const toSimpleString = (str) => {
  83. if (`${Number(str)}` === str) {
  84. return str;
  85. }
  86. return JSON.stringify(str);
  87. };
  88. /**
  89. * Compile boolean matcher.
  90. * @param {Record<string | number, boolean>} map value map
  91. * @returns {boolean | ((value: string) => string)} true/false, when unconditionally true/false, or a template function to determine the value at runtime
  92. */
  93. const compileBooleanMatcher = (map) => {
  94. const positiveItems = Object.keys(map).filter((i) => map[i]);
  95. const negativeItems = Object.keys(map).filter((i) => !map[i]);
  96. if (positiveItems.length === 0) return false;
  97. if (negativeItems.length === 0) return true;
  98. return compileBooleanMatcherFromLists(positiveItems, negativeItems);
  99. };
  100. /**
  101. * Compile boolean matcher from lists.
  102. * @param {string[]} positiveItems positive items
  103. * @param {string[]} negativeItems negative items
  104. * @returns {(value: string) => string} a template function to determine the value at runtime
  105. */
  106. const compileBooleanMatcherFromLists = (positiveItems, negativeItems) => {
  107. if (positiveItems.length === 0) return () => "false";
  108. if (negativeItems.length === 0) return () => "true";
  109. if (positiveItems.length === 1) {
  110. return (value) => `${toSimpleString(positiveItems[0])} == ${value}`;
  111. }
  112. if (negativeItems.length === 1) {
  113. return (value) => `${toSimpleString(negativeItems[0])} != ${value}`;
  114. }
  115. const positiveRegexp = itemsToRegexp(positiveItems);
  116. const negativeRegexp = itemsToRegexp(negativeItems);
  117. if (positiveRegexp.length <= negativeRegexp.length) {
  118. return (value) => `/^${positiveRegexp}$/.test(${value})`;
  119. }
  120. return (value) => `!/^${negativeRegexp}$/.test(${value})`;
  121. };
  122. /** @typedef {string[][]} ListOfCommonItems */
  123. /**
  124. * Returns list of common items.
  125. * @param {Set<string>} itemsSet items set
  126. * @param {(str: string) => string | false} getKey get key function
  127. * @param {(str: string[]) => boolean} condition condition
  128. * @returns {ListOfCommonItems} list of common items
  129. */
  130. const popCommonItems = (itemsSet, getKey, condition) => {
  131. /** @type {Map<string, string[]>} */
  132. const map = new Map();
  133. for (const item of itemsSet) {
  134. const key = getKey(item);
  135. if (key) {
  136. let list = map.get(key);
  137. if (list === undefined) {
  138. /** @type {string[]} */
  139. list = [];
  140. map.set(key, list);
  141. }
  142. list.push(item);
  143. }
  144. }
  145. /** @type {ListOfCommonItems} */
  146. const result = [];
  147. for (const list of map.values()) {
  148. if (condition(list)) {
  149. for (const item of list) {
  150. itemsSet.delete(item);
  151. }
  152. result.push(list);
  153. }
  154. }
  155. return result;
  156. };
  157. /**
  158. * Gets common prefix.
  159. * @param {string[]} items items
  160. * @returns {string} common prefix
  161. */
  162. const getCommonPrefix = (items) => {
  163. let prefix = items[0];
  164. for (let i = 1; i < items.length; i++) {
  165. const item = items[i];
  166. for (let p = 0; p < prefix.length; p++) {
  167. if (item[p] !== prefix[p]) {
  168. prefix = prefix.slice(0, p);
  169. break;
  170. }
  171. }
  172. }
  173. return prefix;
  174. };
  175. /**
  176. * Gets common suffix.
  177. * @param {string[]} items items
  178. * @returns {string} common suffix
  179. */
  180. const getCommonSuffix = (items) => {
  181. let suffix = items[0];
  182. for (let i = 1; i < items.length; i++) {
  183. const item = items[i];
  184. for (let p = item.length - 1, s = suffix.length - 1; s >= 0; p--, s--) {
  185. if (item[p] !== suffix[s]) {
  186. suffix = suffix.slice(s + 1);
  187. break;
  188. }
  189. }
  190. }
  191. return suffix;
  192. };
  193. /**
  194. * Returns regexp.
  195. * @param {string[]} itemsArr array of items
  196. * @returns {string} regexp
  197. */
  198. const itemsToRegexp = (itemsArr) => {
  199. if (itemsArr.length === 1) {
  200. return quoteMeta(itemsArr[0]);
  201. }
  202. /** @type {string[]} */
  203. const finishedItems = [];
  204. // merge single char items: (a|b|c|d|ef) => ([abcd]|ef)
  205. let countOfSingleCharItems = 0;
  206. for (const item of itemsArr) {
  207. if (item.length === 1) {
  208. countOfSingleCharItems++;
  209. }
  210. }
  211. // special case for only single char items
  212. if (countOfSingleCharItems === itemsArr.length) {
  213. return `[${charsToCharClassContent(itemsArr)}]`;
  214. }
  215. /** @type {Set<string>} */
  216. const items = new Set(itemsArr.sort());
  217. if (countOfSingleCharItems > 2) {
  218. /** @type {string[]} */
  219. const singleCharItems = [];
  220. for (const item of items) {
  221. if (item.length === 1) {
  222. singleCharItems.push(item);
  223. items.delete(item);
  224. }
  225. }
  226. finishedItems.push(`[${charsToCharClassContent(singleCharItems)}]`);
  227. }
  228. // special case for 2 items with common prefix/suffix
  229. if (finishedItems.length === 0 && items.size === 2) {
  230. const prefix = getCommonPrefix(itemsArr);
  231. const suffix = getCommonSuffix(
  232. itemsArr.map((item) => item.slice(prefix.length))
  233. );
  234. if (prefix.length > 0 || suffix.length > 0) {
  235. return `${quoteMeta(prefix)}${itemsToRegexp(
  236. itemsArr.map((i) => i.slice(prefix.length, -suffix.length || undefined))
  237. )}${quoteMeta(suffix)}`;
  238. }
  239. }
  240. // special case for 2 items with common suffix
  241. if (finishedItems.length === 0 && items.size === 2) {
  242. /** @type {SetIterator<string>} */
  243. const it = items[Symbol.iterator]();
  244. const a = /** @type {string} */ (it.next().value);
  245. const b = /** @type {string} */ (it.next().value);
  246. if (a.length > 0 && b.length > 0 && a.slice(-1) === b.slice(-1)) {
  247. return `${itemsToRegexp([a.slice(0, -1), b.slice(0, -1)])}${quoteMeta(
  248. a.slice(-1)
  249. )}`;
  250. }
  251. }
  252. // find common prefix: (a1|a2|a3|a4|b5) => (a(1|2|3|4)|b5)
  253. const prefixed = popCommonItems(
  254. items,
  255. (item) => (item.length >= 1 ? item[0] : false),
  256. (list) => {
  257. if (list.length >= 3) return true;
  258. if (list.length <= 1) return false;
  259. return list[0][1] === list[1][1];
  260. }
  261. );
  262. for (const prefixedItems of prefixed) {
  263. const prefix = getCommonPrefix(prefixedItems);
  264. finishedItems.push(
  265. `${quoteMeta(prefix)}${itemsToRegexp(
  266. prefixedItems.map((i) => i.slice(prefix.length))
  267. )}`
  268. );
  269. }
  270. // find common suffix: (a1|b1|c1|d1|e2) => ((a|b|c|d)1|e2)
  271. const suffixed = popCommonItems(
  272. items,
  273. (item) => (item.length >= 1 ? item.slice(-1) : false),
  274. (list) => {
  275. if (list.length >= 3) return true;
  276. if (list.length <= 1) return false;
  277. return list[0].slice(-2) === list[1].slice(-2);
  278. }
  279. );
  280. for (const suffixedItems of suffixed) {
  281. const suffix = getCommonSuffix(suffixedItems);
  282. finishedItems.push(
  283. `${itemsToRegexp(
  284. suffixedItems.map((i) => i.slice(0, -suffix.length))
  285. )}${quoteMeta(suffix)}`
  286. );
  287. }
  288. /** @type {string[]} */
  289. const conditional = [...finishedItems, ...Array.from(items, quoteMeta)];
  290. if (conditional.length === 1) return conditional[0];
  291. return `(${conditional.join("|")})`;
  292. };
  293. compileBooleanMatcher.fromLists = compileBooleanMatcherFromLists;
  294. compileBooleanMatcher.itemsToRegexp = itemsToRegexp;
  295. module.exports = compileBooleanMatcher;