1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- const FIRST_BIT = 0x80;
- const FIRST_TWO_BITS = 0xc0;
- const FIRST_THREE_BITS = 0xe0;
- const FIRST_FOUR_BITS = 0xf0;
- const FIRST_FIVE_BITS = 0xf8;
- const TWO_BIT_CHAR = 0xc0;
- const THREE_BIT_CHAR = 0xe0;
- const FOUR_BIT_CHAR = 0xf0;
- const CONTINUING_CHAR = 0x80;
- /**
- * Determines if the passed in bytes are valid utf8
- * @param bytes - An array of 8-bit bytes. Must be indexable and have length property
- * @param start - The index to start validating
- * @param end - The index to end validating
- */
- export function validateUtf8(
- bytes: { [index: number]: number },
- start: number,
- end: number
- ): boolean {
- let continuation = 0;
- for (let i = start; i < end; i += 1) {
- const byte = bytes[i];
- if (continuation) {
- if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
- return false;
- }
- continuation -= 1;
- } else if (byte & FIRST_BIT) {
- if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
- continuation = 1;
- } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR) {
- continuation = 2;
- } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR) {
- continuation = 3;
- } else {
- return false;
- }
- }
- }
- return !continuation;
- }
|