tokenize.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. 'use strict'
  2. const SINGLE_QUOTE = "'".charCodeAt(0)
  3. const DOUBLE_QUOTE = '"'.charCodeAt(0)
  4. const BACKSLASH = '\\'.charCodeAt(0)
  5. const SLASH = '/'.charCodeAt(0)
  6. const NEWLINE = '\n'.charCodeAt(0)
  7. const SPACE = ' '.charCodeAt(0)
  8. const FEED = '\f'.charCodeAt(0)
  9. const TAB = '\t'.charCodeAt(0)
  10. const CR = '\r'.charCodeAt(0)
  11. const OPEN_SQUARE = '['.charCodeAt(0)
  12. const CLOSE_SQUARE = ']'.charCodeAt(0)
  13. const OPEN_PARENTHESES = '('.charCodeAt(0)
  14. const CLOSE_PARENTHESES = ')'.charCodeAt(0)
  15. const OPEN_CURLY = '{'.charCodeAt(0)
  16. const CLOSE_CURLY = '}'.charCodeAt(0)
  17. const SEMICOLON = ';'.charCodeAt(0)
  18. const ASTERISK = '*'.charCodeAt(0)
  19. const COLON = ':'.charCodeAt(0)
  20. const AT = '@'.charCodeAt(0)
  21. const RE_AT_END = /[\t\n\f\r "#'()/;[\\\]{}]/g
  22. const RE_WORD_END = /[\t\n\f\r !"#'():;@[\\\]{}]|\/(?=\*)/g
  23. const RE_BAD_BRACKET = /.[\r\n"'(/\\]/
  24. const RE_HEX_ESCAPE = /[\da-f]/i
  25. module.exports = function tokenizer(input, options = {}) {
  26. let css = input.css.valueOf()
  27. let ignore = options.ignoreErrors
  28. let code, content, escape, next, quote
  29. let currentToken, escaped, escapePos, n, prev
  30. let length = css.length
  31. let pos = 0
  32. let buffer = []
  33. let returned = []
  34. let lastBadParen = -1
  35. function position() {
  36. return pos
  37. }
  38. function unclosed(what) {
  39. throw input.error('Unclosed ' + what, pos)
  40. }
  41. function endOfFile() {
  42. return returned.length === 0 && pos >= length
  43. }
  44. function nextToken(opts) {
  45. if (returned.length) return returned.pop()
  46. if (pos >= length) return
  47. let ignoreUnclosed = opts ? opts.ignoreUnclosed : false
  48. code = css.charCodeAt(pos)
  49. switch (code) {
  50. case NEWLINE:
  51. case SPACE:
  52. case TAB:
  53. case CR:
  54. case FEED: {
  55. next = pos
  56. do {
  57. next += 1
  58. code = css.charCodeAt(next)
  59. } while (
  60. code === SPACE ||
  61. code === NEWLINE ||
  62. code === TAB ||
  63. code === CR ||
  64. code === FEED
  65. )
  66. currentToken = ['space', css.slice(pos, next)]
  67. pos = next - 1
  68. break
  69. }
  70. case OPEN_SQUARE:
  71. case CLOSE_SQUARE:
  72. case OPEN_CURLY:
  73. case CLOSE_CURLY:
  74. case COLON:
  75. case SEMICOLON:
  76. case CLOSE_PARENTHESES: {
  77. let controlChar = String.fromCharCode(code)
  78. currentToken = [controlChar, controlChar, pos]
  79. break
  80. }
  81. case OPEN_PARENTHESES: {
  82. prev = buffer.length ? buffer.pop()[1] : ''
  83. n = css.charCodeAt(pos + 1)
  84. if (
  85. prev === 'url' &&
  86. n !== SINGLE_QUOTE &&
  87. n !== DOUBLE_QUOTE &&
  88. n !== SPACE &&
  89. n !== NEWLINE &&
  90. n !== TAB &&
  91. n !== FEED &&
  92. n !== CR
  93. ) {
  94. next = pos
  95. do {
  96. escaped = false
  97. next = css.indexOf(')', next + 1)
  98. if (next === -1) {
  99. if (ignore || ignoreUnclosed) {
  100. next = pos
  101. break
  102. } else {
  103. unclosed('bracket')
  104. }
  105. }
  106. escapePos = next
  107. while (css.charCodeAt(escapePos - 1) === BACKSLASH) {
  108. escapePos -= 1
  109. escaped = !escaped
  110. }
  111. } while (escaped)
  112. currentToken = ['brackets', css.slice(pos, next + 1), pos, next]
  113. pos = next
  114. } else if (pos <= lastBadParen) {
  115. currentToken = ['(', '(', pos]
  116. } else {
  117. next = css.indexOf(')', pos + 1)
  118. content = css.slice(pos, next + 1)
  119. if (next === -1 || RE_BAD_BRACKET.test(content)) {
  120. lastBadParen = next === -1 ? length : next
  121. currentToken = ['(', '(', pos]
  122. } else {
  123. currentToken = ['brackets', content, pos, next]
  124. pos = next
  125. }
  126. }
  127. break
  128. }
  129. case SINGLE_QUOTE:
  130. case DOUBLE_QUOTE: {
  131. quote = code === SINGLE_QUOTE ? "'" : '"'
  132. next = pos
  133. do {
  134. escaped = false
  135. next = css.indexOf(quote, next + 1)
  136. if (next === -1) {
  137. if (ignore || ignoreUnclosed) {
  138. next = pos + 1
  139. break
  140. } else {
  141. unclosed('string')
  142. }
  143. }
  144. escapePos = next
  145. while (css.charCodeAt(escapePos - 1) === BACKSLASH) {
  146. escapePos -= 1
  147. escaped = !escaped
  148. }
  149. } while (escaped)
  150. currentToken = ['string', css.slice(pos, next + 1), pos, next]
  151. pos = next
  152. break
  153. }
  154. case AT: {
  155. RE_AT_END.lastIndex = pos + 1
  156. RE_AT_END.test(css)
  157. if (RE_AT_END.lastIndex === 0) {
  158. next = css.length - 1
  159. } else {
  160. next = RE_AT_END.lastIndex - 2
  161. }
  162. currentToken = ['at-word', css.slice(pos, next + 1), pos, next]
  163. pos = next
  164. break
  165. }
  166. case BACKSLASH: {
  167. next = pos
  168. escape = true
  169. while (css.charCodeAt(next + 1) === BACKSLASH) {
  170. next += 1
  171. escape = !escape
  172. }
  173. code = css.charCodeAt(next + 1)
  174. if (
  175. escape &&
  176. code !== SLASH &&
  177. code !== SPACE &&
  178. code !== NEWLINE &&
  179. code !== TAB &&
  180. code !== CR &&
  181. code !== FEED
  182. ) {
  183. next += 1
  184. if (RE_HEX_ESCAPE.test(css.charAt(next))) {
  185. while (RE_HEX_ESCAPE.test(css.charAt(next + 1))) {
  186. next += 1
  187. }
  188. if (css.charCodeAt(next + 1) === SPACE) {
  189. next += 1
  190. }
  191. }
  192. }
  193. currentToken = ['word', css.slice(pos, next + 1), pos, next]
  194. pos = next
  195. break
  196. }
  197. default: {
  198. if (code === SLASH && css.charCodeAt(pos + 1) === ASTERISK) {
  199. next = css.indexOf('*/', pos + 2) + 1
  200. if (next === 0) {
  201. if (ignore || ignoreUnclosed) {
  202. next = css.length
  203. } else {
  204. unclosed('comment')
  205. }
  206. }
  207. currentToken = ['comment', css.slice(pos, next + 1), pos, next]
  208. pos = next
  209. } else {
  210. RE_WORD_END.lastIndex = pos + 1
  211. RE_WORD_END.test(css)
  212. if (RE_WORD_END.lastIndex === 0) {
  213. next = css.length - 1
  214. } else {
  215. next = RE_WORD_END.lastIndex - 2
  216. }
  217. currentToken = ['word', css.slice(pos, next + 1), pos, next]
  218. buffer.push(currentToken)
  219. pos = next
  220. }
  221. break
  222. }
  223. }
  224. pos++
  225. return currentToken
  226. }
  227. function back(token) {
  228. returned.push(token)
  229. }
  230. return {
  231. back,
  232. endOfFile,
  233. nextToken,
  234. position
  235. }
  236. }