deterministicGrouping.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. /*
  2. MIT License http://www.opensource.org/licenses/mit-license.php
  3. Author Tobias Koppers @sokra
  4. */
  5. "use strict";
  6. // Simulations show these probabilities for a single change
  7. // 93.1% that one group is invalidated
  8. // 4.8% that two groups are invalidated
  9. // 1.1% that 3 groups are invalidated
  10. // 0.1% that 4 or more groups are invalidated
  11. //
  12. // And these for removing/adding 10 lexically adjacent files
  13. // 64.5% that one group is invalidated
  14. // 24.8% that two groups are invalidated
  15. // 7.8% that 3 groups are invalidated
  16. // 2.7% that 4 or more groups are invalidated
  17. //
  18. // And these for removing/adding 3 random files
  19. // 0% that one group is invalidated
  20. // 3.7% that two groups are invalidated
  21. // 80.8% that 3 groups are invalidated
  22. // 12.3% that 4 groups are invalidated
  23. // 3.2% that 5 or more groups are invalidated
  24. /**
  25. * Returns the similarity as number.
  26. * @param {string} a key
  27. * @param {string} b key
  28. * @returns {number} the similarity as number
  29. */
  30. const similarity = (a, b) => {
  31. const l = Math.min(a.length, b.length);
  32. let dist = 0;
  33. for (let i = 0; i < l; i++) {
  34. const ca = a.charCodeAt(i);
  35. const cb = b.charCodeAt(i);
  36. dist += Math.max(0, 10 - Math.abs(ca - cb));
  37. }
  38. return dist;
  39. };
  40. /**
  41. * Returns the common part and a single char for the difference.
  42. * @param {string} a key
  43. * @param {string} b key
  44. * @param {Set<string>} usedNames set of already used names
  45. * @returns {string} the common part and a single char for the difference
  46. */
  47. const getName = (a, b, usedNames) => {
  48. const l = Math.min(a.length, b.length);
  49. let i = 0;
  50. while (i < l) {
  51. if (a.charCodeAt(i) !== b.charCodeAt(i)) {
  52. i++;
  53. break;
  54. }
  55. i++;
  56. }
  57. while (i < l) {
  58. const name = a.slice(0, i);
  59. const lowerName = name.toLowerCase();
  60. if (!usedNames.has(lowerName)) {
  61. usedNames.add(lowerName);
  62. return name;
  63. }
  64. i++;
  65. }
  66. // names always contain a hash, so this is always unique
  67. // we don't need to check usedNames nor add it
  68. return a;
  69. };
  70. /** @typedef {Record<string, number>} Sizes */
  71. /**
  72. * Adds the provided total to this object.
  73. * @param {Sizes} total total size
  74. * @param {Sizes} size single size
  75. * @returns {void}
  76. */
  77. const addSizeTo = (total, size) => {
  78. for (const key of Object.keys(size)) {
  79. total[key] = (total[key] || 0) + size[key];
  80. }
  81. };
  82. /**
  83. * Subtract size from.
  84. * @param {Sizes} total total size
  85. * @param {Sizes} size single size
  86. * @returns {void}
  87. */
  88. const subtractSizeFrom = (total, size) => {
  89. for (const key of Object.keys(size)) {
  90. total[key] -= size[key];
  91. }
  92. };
  93. /**
  94. * Returns total size.
  95. * @template T
  96. * @param {Iterable<Node<T>>} nodes some nodes
  97. * @returns {Sizes} total size
  98. */
  99. const sumSize = (nodes) => {
  100. /** @type {Sizes} */
  101. const sum = Object.create(null);
  102. for (const node of nodes) {
  103. addSizeTo(sum, node.size);
  104. }
  105. return sum;
  106. };
  107. /**
  108. * Checks whether this object is too big.
  109. * @param {Sizes} size size
  110. * @param {Sizes} maxSize minimum size
  111. * @returns {boolean} true, when size is too big
  112. */
  113. const isTooBig = (size, maxSize) => {
  114. for (const key of Object.keys(size)) {
  115. const s = size[key];
  116. if (s === 0) continue;
  117. const maxSizeValue = maxSize[key];
  118. if (typeof maxSizeValue === "number" && s > maxSizeValue) return true;
  119. }
  120. return false;
  121. };
  122. /**
  123. * Checks whether this object is too small.
  124. * @param {Sizes} size size
  125. * @param {Sizes} minSize minimum size
  126. * @returns {boolean} true, when size is too small
  127. */
  128. const isTooSmall = (size, minSize) => {
  129. for (const key of Object.keys(size)) {
  130. const s = size[key];
  131. if (s === 0) continue;
  132. const minSizeValue = minSize[key];
  133. if (typeof minSizeValue === "number" && s < minSizeValue) return true;
  134. }
  135. return false;
  136. };
  137. /** @typedef {Set<string>} Types */
  138. /**
  139. * Gets too small types.
  140. * @param {Sizes} size size
  141. * @param {Sizes} minSize minimum size
  142. * @returns {Types} set of types that are too small
  143. */
  144. const getTooSmallTypes = (size, minSize) => {
  145. /** @type {Types} */
  146. const types = new Set();
  147. for (const key of Object.keys(size)) {
  148. const s = size[key];
  149. if (s === 0) continue;
  150. const minSizeValue = minSize[key];
  151. if (typeof minSizeValue === "number" && s < minSizeValue) types.add(key);
  152. }
  153. return types;
  154. };
  155. /**
  156. * Gets number of matching size types.
  157. * @template {object} T
  158. * @param {T} size size
  159. * @param {Types} types types
  160. * @returns {number} number of matching size types
  161. */
  162. const getNumberOfMatchingSizeTypes = (size, types) => {
  163. let i = 0;
  164. for (const key of Object.keys(size)) {
  165. if (size[/** @type {keyof T} */ (key)] !== 0 && types.has(key)) i++;
  166. }
  167. return i;
  168. };
  169. /**
  170. * Selective size sum.
  171. * @param {Sizes} size size
  172. * @param {Types} types types
  173. * @returns {number} selective size sum
  174. */
  175. const selectiveSizeSum = (size, types) => {
  176. let sum = 0;
  177. for (const key of Object.keys(size)) {
  178. if (size[key] !== 0 && types.has(key)) sum += size[key];
  179. }
  180. return sum;
  181. };
  182. /**
  183. * Represents the node runtime component.
  184. * @template T
  185. */
  186. class Node {
  187. /**
  188. * Creates an instance of Node.
  189. * @param {T} item item
  190. * @param {string} key key
  191. * @param {Sizes} size size
  192. */
  193. constructor(item, key, size) {
  194. this.item = item;
  195. this.key = key;
  196. this.size = size;
  197. }
  198. }
  199. /** @typedef {number[]} Similarities */
  200. /**
  201. * Represents the group runtime component.
  202. * @template T
  203. */
  204. class Group {
  205. /**
  206. * Creates an instance of Group.
  207. * @param {Node<T>[]} nodes nodes
  208. * @param {Similarities | null} similarities similarities between the nodes (length = nodes.length - 1)
  209. * @param {Sizes=} size size of the group
  210. */
  211. constructor(nodes, similarities, size) {
  212. this.nodes = nodes;
  213. this.similarities = similarities;
  214. this.size = size || sumSize(nodes);
  215. /** @type {string | undefined} */
  216. this.key = undefined;
  217. }
  218. /**
  219. * Returns removed nodes.
  220. * @param {(node: Node<T>) => boolean} filter filter function
  221. * @returns {Node<T>[] | undefined} removed nodes
  222. */
  223. popNodes(filter) {
  224. /** @type {Node<T>[]} */
  225. const newNodes = [];
  226. /** @type {Similarities} */
  227. const newSimilarities = [];
  228. /** @type {Node<T>[]} */
  229. const resultNodes = [];
  230. /** @type {undefined | Node<T>} */
  231. let lastNode;
  232. for (let i = 0; i < this.nodes.length; i++) {
  233. const node = this.nodes[i];
  234. if (filter(node)) {
  235. resultNodes.push(node);
  236. } else {
  237. if (newNodes.length > 0) {
  238. newSimilarities.push(
  239. lastNode === this.nodes[i - 1]
  240. ? /** @type {Similarities} */ (this.similarities)[i - 1]
  241. : similarity(/** @type {Node<T>} */ (lastNode).key, node.key)
  242. );
  243. }
  244. newNodes.push(node);
  245. lastNode = node;
  246. }
  247. }
  248. if (resultNodes.length === this.nodes.length) return;
  249. this.nodes = newNodes;
  250. this.similarities = newSimilarities;
  251. this.size = sumSize(newNodes);
  252. return resultNodes;
  253. }
  254. }
  255. /**
  256. * Returns similarities.
  257. * @template T
  258. * @param {Iterable<Node<T>>} nodes nodes
  259. * @returns {Similarities} similarities
  260. */
  261. const getSimilarities = (nodes) => {
  262. // calculate similarities between lexically adjacent nodes
  263. /** @type {Similarities} */
  264. const similarities = [];
  265. /** @type {undefined | Node<T>} */
  266. let last;
  267. for (const node of nodes) {
  268. if (last !== undefined) {
  269. similarities.push(similarity(last.key, node.key));
  270. }
  271. last = node;
  272. }
  273. return similarities;
  274. };
  275. /**
  276. * Defines the shared type used by this module.
  277. * @template T
  278. * @typedef {object} GroupedItems<T>
  279. * @property {string} key
  280. * @property {T[]} items
  281. * @property {Sizes} size
  282. */
  283. /**
  284. * Defines the options type used by this module.
  285. * @template T
  286. * @typedef {object} Options
  287. * @property {Sizes} maxSize maximum size of a group
  288. * @property {Sizes} minSize minimum size of a group (preferred over maximum size)
  289. * @property {Iterable<T>} items a list of items
  290. * @property {(item: T) => Sizes} getSize function to get size of an item
  291. * @property {(item: T) => string} getKey function to get the key of an item
  292. */
  293. /**
  294. * Returns grouped items.
  295. * @template T
  296. * @param {Options<T>} options options object
  297. * @returns {GroupedItems<T>[]} grouped items
  298. */
  299. module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
  300. /** @type {Group<T>[]} */
  301. const result = [];
  302. const nodes = Array.from(
  303. items,
  304. (item) => new Node(item, getKey(item), getSize(item))
  305. );
  306. /** @type {Node<T>[]} */
  307. const initialNodes = [];
  308. // lexically ordering of keys
  309. nodes.sort((a, b) => {
  310. if (a.key < b.key) return -1;
  311. if (a.key > b.key) return 1;
  312. return 0;
  313. });
  314. // return nodes bigger than maxSize directly as group
  315. // But make sure that minSize is not violated
  316. for (const node of nodes) {
  317. if (isTooBig(node.size, maxSize) && !isTooSmall(node.size, minSize)) {
  318. result.push(new Group([node], []));
  319. } else {
  320. initialNodes.push(node);
  321. }
  322. }
  323. if (initialNodes.length > 0) {
  324. const initialGroup = new Group(initialNodes, getSimilarities(initialNodes));
  325. /**
  326. * Removes problematic nodes.
  327. * @param {Group<T>} group group
  328. * @param {Sizes} consideredSize size of the group to consider
  329. * @returns {boolean} true, if the group was modified
  330. */
  331. const removeProblematicNodes = (group, consideredSize = group.size) => {
  332. const problemTypes = getTooSmallTypes(consideredSize, minSize);
  333. if (problemTypes.size > 0) {
  334. // We hit an edge case where the working set is already smaller than minSize
  335. // We merge problematic nodes with the smallest result node to keep minSize intact
  336. const problemNodes = group.popNodes(
  337. (n) => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
  338. );
  339. if (problemNodes === undefined) return false;
  340. // Only merge it with result nodes that have the problematic size type
  341. const possibleResultGroups = result.filter(
  342. (n) => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
  343. );
  344. if (possibleResultGroups.length > 0) {
  345. const bestGroup = possibleResultGroups.reduce((min, group) => {
  346. const minMatches = getNumberOfMatchingSizeTypes(min, problemTypes);
  347. const groupMatches = getNumberOfMatchingSizeTypes(
  348. group,
  349. problemTypes
  350. );
  351. if (minMatches !== groupMatches) {
  352. return minMatches < groupMatches ? group : min;
  353. }
  354. if (
  355. selectiveSizeSum(min.size, problemTypes) >
  356. selectiveSizeSum(group.size, problemTypes)
  357. ) {
  358. return group;
  359. }
  360. return min;
  361. });
  362. for (const node of problemNodes) bestGroup.nodes.push(node);
  363. bestGroup.nodes.sort((a, b) => {
  364. if (a.key < b.key) return -1;
  365. if (a.key > b.key) return 1;
  366. return 0;
  367. });
  368. } else {
  369. // There are no other nodes with the same size types
  370. // We create a new group and have to accept that it's smaller than minSize
  371. result.push(new Group(problemNodes, null));
  372. }
  373. return true;
  374. }
  375. return false;
  376. };
  377. if (initialGroup.nodes.length > 0) {
  378. const queue = [initialGroup];
  379. while (queue.length) {
  380. const group = /** @type {Group<T>} */ (queue.pop());
  381. // only groups bigger than maxSize need to be splitted
  382. if (!isTooBig(group.size, maxSize)) {
  383. result.push(group);
  384. continue;
  385. }
  386. // If the group is already too small
  387. // we try to work only with the unproblematic nodes
  388. if (removeProblematicNodes(group)) {
  389. // This changed something, so we try this group again
  390. queue.push(group);
  391. continue;
  392. }
  393. // find unsplittable area from left and right
  394. // going minSize from left and right
  395. // at least one node need to be included otherwise we get stuck
  396. let left = 1;
  397. /** @type {Sizes} */
  398. const leftSize = Object.create(null);
  399. addSizeTo(leftSize, group.nodes[0].size);
  400. while (left < group.nodes.length && isTooSmall(leftSize, minSize)) {
  401. addSizeTo(leftSize, group.nodes[left].size);
  402. left++;
  403. }
  404. let right = group.nodes.length - 2;
  405. /** @type {Sizes} */
  406. const rightSize = Object.create(null);
  407. addSizeTo(rightSize, group.nodes[group.nodes.length - 1].size);
  408. while (right >= 0 && isTooSmall(rightSize, minSize)) {
  409. addSizeTo(rightSize, group.nodes[right].size);
  410. right--;
  411. }
  412. // left v v right
  413. // [ O O O ] O O O [ O O O ]
  414. // ^^^^^^^^^ leftSize
  415. // rightSize ^^^^^^^^^
  416. // leftSize > minSize
  417. // rightSize > minSize
  418. // Perfect split: [ O O O ] [ O O O ]
  419. // right === left - 1
  420. if (left - 1 > right) {
  421. // We try to remove some problematic nodes to "fix" that
  422. /** @type {Sizes} */
  423. let prevSize;
  424. if (right < group.nodes.length - left) {
  425. subtractSizeFrom(rightSize, group.nodes[right + 1].size);
  426. prevSize = rightSize;
  427. } else {
  428. subtractSizeFrom(leftSize, group.nodes[left - 1].size);
  429. prevSize = leftSize;
  430. }
  431. if (removeProblematicNodes(group, prevSize)) {
  432. // This changed something, so we try this group again
  433. queue.push(group);
  434. continue;
  435. }
  436. // can't split group while holding minSize
  437. // because minSize is preferred of maxSize we return
  438. // the problematic nodes as result here even while it's too big
  439. // To avoid this make sure maxSize > minSize * 3
  440. result.push(group);
  441. continue;
  442. }
  443. if (left <= right) {
  444. // when there is a area between left and right
  445. // we look for best split point
  446. // we split at the minimum similarity
  447. // here key space is separated the most
  448. // But we also need to make sure to not create too small groups
  449. let best = -1;
  450. let bestSimilarity = Infinity;
  451. let pos = left;
  452. const rightSize = sumSize(group.nodes.slice(pos));
  453. // pos v v right
  454. // [ O O O ] O O O [ O O O ]
  455. // ^^^^^^^^^ leftSize
  456. // rightSize ^^^^^^^^^^^^^^^
  457. while (pos <= right + 1) {
  458. const similarity =
  459. /** @type {Similarities} */
  460. (group.similarities)[pos - 1];
  461. if (
  462. similarity < bestSimilarity &&
  463. !isTooSmall(leftSize, minSize) &&
  464. !isTooSmall(rightSize, minSize)
  465. ) {
  466. best = pos;
  467. bestSimilarity = similarity;
  468. }
  469. addSizeTo(leftSize, group.nodes[pos].size);
  470. subtractSizeFrom(rightSize, group.nodes[pos].size);
  471. pos++;
  472. }
  473. if (best < 0) {
  474. // This can't happen
  475. // but if that assumption is wrong
  476. // fallback to a big group
  477. result.push(group);
  478. continue;
  479. }
  480. left = best;
  481. right = best - 1;
  482. }
  483. // create two new groups for left and right area
  484. // and queue them up
  485. /** @type {Node<T>[]} */
  486. const rightNodes = [group.nodes[right + 1]];
  487. /** @type {Similarities} */
  488. const rightSimilarities = [];
  489. for (let i = right + 2; i < group.nodes.length; i++) {
  490. rightSimilarities.push(
  491. /** @type {Similarities} */ (group.similarities)[i - 1]
  492. );
  493. rightNodes.push(group.nodes[i]);
  494. }
  495. queue.push(new Group(rightNodes, rightSimilarities));
  496. /** @type {Node<T>[]} */
  497. const leftNodes = [group.nodes[0]];
  498. /** @type {Similarities} */
  499. const leftSimilarities = [];
  500. for (let i = 1; i < left; i++) {
  501. leftSimilarities.push(
  502. /** @type {Similarities} */ (group.similarities)[i - 1]
  503. );
  504. leftNodes.push(group.nodes[i]);
  505. }
  506. queue.push(new Group(leftNodes, leftSimilarities));
  507. }
  508. }
  509. }
  510. // lexically ordering
  511. result.sort((a, b) => {
  512. if (a.nodes[0].key < b.nodes[0].key) return -1;
  513. if (a.nodes[0].key > b.nodes[0].key) return 1;
  514. return 0;
  515. });
  516. // give every group a name
  517. /** @type {Set<string>} */
  518. const usedNames = new Set();
  519. for (let i = 0; i < result.length; i++) {
  520. const group = result[i];
  521. if (group.nodes.length === 1) {
  522. group.key = group.nodes[0].key;
  523. } else {
  524. const first = group.nodes[0];
  525. const last = group.nodes[group.nodes.length - 1];
  526. const name = getName(first.key, last.key, usedNames);
  527. group.key = name;
  528. }
  529. }
  530. // return the results
  531. return result.map(
  532. (group) =>
  533. /** @type {GroupedItems<T>} */
  534. ({
  535. key: group.key,
  536. items: group.nodes.map((node) => node.item),
  537. size: group.size
  538. })
  539. );
  540. };