stringify-info.js 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import { normalizeStringifyOptions, replaceValue, resolveStringifyMode } from './utils.js';
  2. const hasOwn = typeof Object.hasOwn === 'function'
  3. ? Object.hasOwn
  4. : (object, key) => Object.hasOwnProperty.call(object, key);
  5. // https://tc39.es/ecma262/#table-json-single-character-escapes
  6. const escapableCharCodeSubstitution = { // JSON Single Character Escape Sequences
  7. 0x08: '\\b',
  8. 0x09: '\\t',
  9. 0x0a: '\\n',
  10. 0x0c: '\\f',
  11. 0x0d: '\\r',
  12. 0x22: '\\\"',
  13. 0x5c: '\\\\'
  14. };
  15. const charLength2048 = Uint8Array.from({ length: 2048 }, (_, code) => {
  16. if (hasOwn(escapableCharCodeSubstitution, code)) {
  17. return 2; // \X
  18. }
  19. if (code < 0x20) {
  20. return 6; // \uXXXX
  21. }
  22. return code < 128 ? 1 : 2; // UTF8 bytes
  23. });
  24. function isLeadingSurrogate(code) {
  25. return code >= 0xD800 && code <= 0xDBFF;
  26. }
  27. function isTrailingSurrogate(code) {
  28. return code >= 0xDC00 && code <= 0xDFFF;
  29. }
  30. function stringLength(str) {
  31. // Fast path to compute length when a string contains only characters encoded as single bytes
  32. if (!/[^\x20\x21\x23-\x5B\x5D-\x7F]/.test(str)) {
  33. return str.length + 2;
  34. }
  35. let len = 0;
  36. let prevLeadingSurrogate = false;
  37. for (let i = 0; i < str.length; i++) {
  38. const code = str.charCodeAt(i);
  39. if (code < 2048) {
  40. len += charLength2048[code];
  41. } else if (isLeadingSurrogate(code)) {
  42. len += 6; // \uXXXX since no pair with trailing surrogate yet
  43. prevLeadingSurrogate = true;
  44. continue;
  45. } else if (isTrailingSurrogate(code)) {
  46. len = prevLeadingSurrogate
  47. ? len - 2 // surrogate pair (4 bytes), since we calculate prev leading surrogate as 6 bytes, substruct 2 bytes
  48. : len + 6; // \uXXXX
  49. } else {
  50. len += 3; // code >= 2048 is 3 bytes length for UTF8
  51. }
  52. prevLeadingSurrogate = false;
  53. }
  54. return len + 2; // +2 for quotes
  55. }
  56. // avoid producing a string from a number
  57. function intLength(num) {
  58. let len = 0;
  59. if (num < 0) {
  60. len = 1;
  61. num = -num;
  62. }
  63. if (num >= 1e9) {
  64. len += 9;
  65. num = (num - num % 1e9) / 1e9;
  66. }
  67. if (num >= 1e4) {
  68. if (num >= 1e6) {
  69. return len + (num >= 1e8
  70. ? 9
  71. : num >= 1e7 ? 8 : 7
  72. );
  73. }
  74. return len + (num >= 1e5 ? 6 : 5);
  75. }
  76. return len + (num >= 1e2
  77. ? num >= 1e3 ? 4 : 3
  78. : num >= 10 ? 2 : 1
  79. );
  80. };
  81. function primitiveLength(value) {
  82. switch (typeof value) {
  83. case 'string':
  84. return stringLength(value);
  85. case 'number':
  86. return Number.isFinite(value)
  87. ? Number.isInteger(value)
  88. ? intLength(value)
  89. : String(value).length
  90. : 4 /* null */;
  91. case 'boolean':
  92. return value ? 4 /* true */ : 5 /* false */;
  93. case 'undefined':
  94. case 'object':
  95. return 4; /* null */
  96. default:
  97. return 0;
  98. }
  99. }
  100. export function stringifyInfo(value, ...args) {
  101. const { replacer, getKeys, ...options } = normalizeStringifyOptions(...args);
  102. const continueOnCircular = Boolean(options.continueOnCircular);
  103. const space = options.space?.length || 0;
  104. const roots = resolveStringifyMode(options.mode) === 'jsonl' && Array.isArray(value) ? value : [value];
  105. const keysLength = new Map();
  106. const visited = new Map();
  107. const circular = new Set();
  108. const stack = [];
  109. let stop = false;
  110. let bytes = 0;
  111. let spaceBytes = 0;
  112. let objects = 0;
  113. for (let i = 0; i < roots.length; i++) {
  114. if (i > 0) {
  115. bytes += 1; // newline separator
  116. }
  117. walk({ '': roots[i] }, '', roots[i]);
  118. }
  119. // when value is undefined or replaced for undefined
  120. if (bytes === 0 && roots.length === 1) {
  121. bytes += 9; // FIXME: that's the length of undefined, should we normalize behaviour to convert it to null?
  122. }
  123. return {
  124. bytes: isNaN(bytes) ? Infinity : bytes + spaceBytes,
  125. spaceBytes: space > 0 && isNaN(bytes) ? Infinity : spaceBytes,
  126. circular: [...circular]
  127. };
  128. function walk(holder, key, value) {
  129. if (stop) {
  130. return;
  131. }
  132. value = replaceValue(holder, key, value, replacer);
  133. if (value === null || typeof value !== 'object') {
  134. // primitive
  135. if (value !== undefined || Array.isArray(holder)) {
  136. bytes += primitiveLength(value);
  137. }
  138. } else {
  139. // check for circular references
  140. if (stack.includes(value)) {
  141. circular.add(value);
  142. bytes += 4; // treat as null
  143. if (!continueOnCircular) {
  144. stop = true;
  145. }
  146. return;
  147. }
  148. // Using 'visited' allows avoiding hang-ups in cases of highly interconnected object graphs;
  149. // for example, a list of git commits with references to parents can lead to N^2 complexity for traversal,
  150. // and N when 'visited' is used
  151. if (visited.has(value)) {
  152. bytes += visited.get(value);
  153. return;
  154. }
  155. objects++;
  156. const prevObjects = objects;
  157. const valueBytes = bytes;
  158. let valueLength = 0;
  159. stack.push(value);
  160. if (Array.isArray(value)) {
  161. // array
  162. valueLength = value.length;
  163. for (let i = 0; i < valueLength; i++) {
  164. walk(value, i, value[i]);
  165. }
  166. } else {
  167. // object
  168. let prevLength = bytes;
  169. for (const key of getKeys(value)) {
  170. walk(value, key, value[key]);
  171. if (prevLength !== bytes) {
  172. let keyLen = keysLength.get(key);
  173. if (keyLen === undefined) {
  174. keysLength.set(key, keyLen = stringLength(key) + 1); // "key":
  175. }
  176. // value is printed
  177. bytes += keyLen;
  178. valueLength++;
  179. prevLength = bytes;
  180. }
  181. }
  182. }
  183. bytes += valueLength === 0
  184. ? 2 // {} or []
  185. : 1 + valueLength; // {} or [] + commas
  186. if (space > 0 && valueLength > 0) {
  187. spaceBytes +=
  188. // a space between ":" and a value for each object entry
  189. (Array.isArray(value) ? 0 : valueLength) +
  190. // the formula results from folding the following components:
  191. // - for each key-value or element: ident + newline
  192. // (1 + stack.length * space) * valueLength
  193. // - ident (one space less) before "}" or "]" + newline
  194. // (stack.length - 1) * space + 1
  195. (1 + stack.length * space) * (valueLength + 1) - space;
  196. }
  197. stack.pop();
  198. // add to 'visited' only objects that contain nested objects
  199. if (prevObjects !== objects) {
  200. visited.set(value, bytes - valueBytes);
  201. }
  202. }
  203. }
  204. };