validate_utf8.ts 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. const FIRST_BIT = 0x80;
  2. const FIRST_TWO_BITS = 0xc0;
  3. const FIRST_THREE_BITS = 0xe0;
  4. const FIRST_FOUR_BITS = 0xf0;
  5. const FIRST_FIVE_BITS = 0xf8;
  6. const TWO_BIT_CHAR = 0xc0;
  7. const THREE_BIT_CHAR = 0xe0;
  8. const FOUR_BIT_CHAR = 0xf0;
  9. const CONTINUING_CHAR = 0x80;
  10. /**
  11. * Determines if the passed in bytes are valid utf8
  12. * @param bytes - An array of 8-bit bytes. Must be indexable and have length property
  13. * @param start - The index to start validating
  14. * @param end - The index to end validating
  15. */
  16. export function validateUtf8(
  17. bytes: { [index: number]: number },
  18. start: number,
  19. end: number
  20. ): boolean {
  21. let continuation = 0;
  22. for (let i = start; i < end; i += 1) {
  23. const byte = bytes[i];
  24. if (continuation) {
  25. if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
  26. return false;
  27. }
  28. continuation -= 1;
  29. } else if (byte & FIRST_BIT) {
  30. if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
  31. continuation = 1;
  32. } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR) {
  33. continuation = 2;
  34. } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR) {
  35. continuation = 3;
  36. } else {
  37. return false;
  38. }
  39. }
  40. }
  41. return !continuation;
  42. }