| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- /**
- * Estimate decoded byte length of a data:// URL *without* allocating large buffers.
- * - For base64: compute exact decoded size using length and padding;
- * handle %XX at the character-count level (no string allocation).
- * - For non-base64: use UTF-8 byteLength of the encoded body as a safe upper bound.
- *
- * @param {string} url
- * @returns {number}
- */
- export default function estimateDataURLDecodedBytes(url) {
- if (!url || typeof url !== 'string') return 0;
- if (!url.startsWith('data:')) return 0;
- const comma = url.indexOf(',');
- if (comma < 0) return 0;
- const meta = url.slice(5, comma);
- const body = url.slice(comma + 1);
- const isBase64 = /;base64/i.test(meta);
- if (isBase64) {
- let effectiveLen = body.length;
- const len = body.length; // cache length
- for (let i = 0; i < len; i++) {
- if (body.charCodeAt(i) === 37 /* '%' */ && i + 2 < len) {
- const a = body.charCodeAt(i + 1);
- const b = body.charCodeAt(i + 2);
- const isHex =
- ((a >= 48 && a <= 57) || (a >= 65 && a <= 70) || (a >= 97 && a <= 102)) &&
- ((b >= 48 && b <= 57) || (b >= 65 && b <= 70) || (b >= 97 && b <= 102));
- if (isHex) {
- effectiveLen -= 2;
- i += 2;
- }
- }
- }
- let pad = 0;
- let idx = len - 1;
- const tailIsPct3D = (j) =>
- j >= 2 &&
- body.charCodeAt(j - 2) === 37 && // '%'
- body.charCodeAt(j - 1) === 51 && // '3'
- (body.charCodeAt(j) === 68 || body.charCodeAt(j) === 100); // 'D' or 'd'
- if (idx >= 0) {
- if (body.charCodeAt(idx) === 61 /* '=' */) {
- pad++;
- idx--;
- } else if (tailIsPct3D(idx)) {
- pad++;
- idx -= 3;
- }
- }
- if (pad === 1 && idx >= 0) {
- if (body.charCodeAt(idx) === 61 /* '=' */) {
- pad++;
- } else if (tailIsPct3D(idx)) {
- pad++;
- }
- }
- const groups = Math.floor(effectiveLen / 4);
- const bytes = groups * 3 - (pad || 0);
- return bytes > 0 ? bytes : 0;
- }
- if (typeof Buffer !== 'undefined' && typeof Buffer.byteLength === 'function') {
- return Buffer.byteLength(body, 'utf8');
- }
- // Compute UTF-8 byte length directly from UTF-16 code units without allocating
- // a byte buffer (TextEncoder.encode would defeat the DoS guard on large bodies).
- // Using body.length here would undercount non-ASCII (e.g. '€' is 1 code unit
- // but 3 UTF-8 bytes).
- let bytes = 0;
- for (let i = 0, len = body.length; i < len; i++) {
- const c = body.charCodeAt(i);
- if (c < 0x80) {
- bytes += 1;
- } else if (c < 0x800) {
- bytes += 2;
- } else if (c >= 0xd800 && c <= 0xdbff && i + 1 < len) {
- const next = body.charCodeAt(i + 1);
- if (next >= 0xdc00 && next <= 0xdfff) {
- bytes += 4;
- i++;
- } else {
- bytes += 3;
- }
- } else {
- bytes += 3;
- }
- }
- return bytes;
- }
|