tokenizer.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. "use strict";
  2. var _mamacro = require("mamacro");
  3. var _helperFsm = require("@webassemblyjs/helper-fsm");
  4. var _helperCodeFrame = require("@webassemblyjs/helper-code-frame");
  5. // eslint-disable-next-line
  6. function getCodeFrame(source, line, column) {
  7. var loc = {
  8. start: {
  9. line: line,
  10. column: column
  11. }
  12. };
  13. return "\n" + (0, _helperCodeFrame.codeFrameFromSource)(source, loc) + "\n";
  14. }
  15. var WHITESPACE = /\s/;
  16. var PARENS = /\(|\)/;
  17. var LETTERS = /[a-z0-9_/]/i;
  18. var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
  19. var valtypes = ["i32", "i64", "f32", "f64"];
  20. var NUMBERS = /[0-9|.|_]/;
  21. var NUMBER_KEYWORDS = /nan|inf/;
  22. function isNewLine(char) {
  23. return char.charCodeAt(0) === 10 || char.charCodeAt(0) === 13;
  24. }
  25. function Token(type, value, start, end) {
  26. var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
  27. var token = {
  28. type: type,
  29. value: value,
  30. loc: {
  31. start: start,
  32. end: end
  33. }
  34. };
  35. if (Object.keys(opts).length > 0) {
  36. // $FlowIgnore
  37. token["opts"] = opts;
  38. }
  39. return token;
  40. }
  41. var tokenTypes = {
  42. openParen: "openParen",
  43. closeParen: "closeParen",
  44. number: "number",
  45. string: "string",
  46. name: "name",
  47. identifier: "identifier",
  48. valtype: "valtype",
  49. dot: "dot",
  50. comment: "comment",
  51. equal: "equal",
  52. keyword: "keyword"
  53. };
  54. var keywords = {
  55. module: "module",
  56. func: "func",
  57. param: "param",
  58. result: "result",
  59. export: "export",
  60. loop: "loop",
  61. block: "block",
  62. if: "if",
  63. then: "then",
  64. else: "else",
  65. call: "call",
  66. call_indirect: "call_indirect",
  67. import: "import",
  68. memory: "memory",
  69. table: "table",
  70. global: "global",
  71. anyfunc: "anyfunc",
  72. mut: "mut",
  73. data: "data",
  74. type: "type",
  75. elem: "elem",
  76. start: "start",
  77. offset: "offset"
  78. };
  79. var NUMERIC_SEPARATOR = "_";
  80. /**
  81. * Build the FSM for number literals
  82. */
  83. var numberLiteralFSM = new _helperFsm.FSM({
  84. START: [(0, _helperFsm.makeTransition)(/-|\+/, "AFTER_SIGN"), (0, _helperFsm.makeTransition)(/nan:0x/, "NAN_HEX", {
  85. n: 6
  86. }), (0, _helperFsm.makeTransition)(/nan|inf/, "STOP", {
  87. n: 3
  88. }), (0, _helperFsm.makeTransition)(/0x/, "HEX", {
  89. n: 2
  90. }), (0, _helperFsm.makeTransition)(/[0-9]/, "DEC"), (0, _helperFsm.makeTransition)(/\./, "DEC_FRAC")],
  91. AFTER_SIGN: [(0, _helperFsm.makeTransition)(/nan:0x/, "NAN_HEX", {
  92. n: 6
  93. }), (0, _helperFsm.makeTransition)(/nan|inf/, "STOP", {
  94. n: 3
  95. }), (0, _helperFsm.makeTransition)(/0x/, "HEX", {
  96. n: 2
  97. }), (0, _helperFsm.makeTransition)(/[0-9]/, "DEC"), (0, _helperFsm.makeTransition)(/\./, "DEC_FRAC")],
  98. DEC_FRAC: [(0, _helperFsm.makeTransition)(/[0-9]/, "DEC_FRAC", {
  99. allowedSeparator: NUMERIC_SEPARATOR
  100. }), (0, _helperFsm.makeTransition)(/e|E/, "DEC_SIGNED_EXP")],
  101. DEC: [(0, _helperFsm.makeTransition)(/[0-9]/, "DEC", {
  102. allowedSeparator: NUMERIC_SEPARATOR
  103. }), (0, _helperFsm.makeTransition)(/\./, "DEC_FRAC"), (0, _helperFsm.makeTransition)(/e|E/, "DEC_SIGNED_EXP")],
  104. DEC_SIGNED_EXP: [(0, _helperFsm.makeTransition)(/\+|-/, "DEC_EXP"), (0, _helperFsm.makeTransition)(/[0-9]/, "DEC_EXP")],
  105. DEC_EXP: [(0, _helperFsm.makeTransition)(/[0-9]/, "DEC_EXP", {
  106. allowedSeparator: NUMERIC_SEPARATOR
  107. })],
  108. HEX: [(0, _helperFsm.makeTransition)(/[0-9|A-F|a-f]/, "HEX", {
  109. allowedSeparator: NUMERIC_SEPARATOR
  110. }), (0, _helperFsm.makeTransition)(/\./, "HEX_FRAC"), (0, _helperFsm.makeTransition)(/p|P/, "HEX_SIGNED_EXP")],
  111. HEX_FRAC: [(0, _helperFsm.makeTransition)(/[0-9|A-F|a-f]/, "HEX_FRAC", {
  112. allowedSeparator: NUMERIC_SEPARATOR
  113. }), (0, _helperFsm.makeTransition)(/p|P|/, "HEX_SIGNED_EXP")],
  114. HEX_SIGNED_EXP: [(0, _helperFsm.makeTransition)(/[0-9|+|-]/, "HEX_EXP")],
  115. HEX_EXP: [(0, _helperFsm.makeTransition)(/[0-9]/, "HEX_EXP", {
  116. allowedSeparator: NUMERIC_SEPARATOR
  117. })],
  118. NAN_HEX: [(0, _helperFsm.makeTransition)(/[0-9|A-F|a-f]/, "NAN_HEX", {
  119. allowedSeparator: NUMERIC_SEPARATOR
  120. })],
  121. STOP: []
  122. }, "START", "STOP");
  123. function tokenize(input) {
  124. var current = 0;
  125. var char = input[current]; // Used by SourceLocation
  126. var column = 1;
  127. var line = 1;
  128. var tokens = [];
  129. /**
  130. * Creates a pushToken function for a given type
  131. */
  132. function pushToken(type) {
  133. return function (v) {
  134. var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
  135. var startColumn = opts.startColumn || column - String(v).length;
  136. delete opts.startColumn;
  137. var endColumn = opts.endColumn || startColumn + String(v).length - 1;
  138. delete opts.endColumn;
  139. var start = {
  140. line: line,
  141. column: startColumn
  142. };
  143. var end = {
  144. line: line,
  145. column: endColumn
  146. };
  147. tokens.push(Token(type, v, start, end, opts));
  148. };
  149. }
  150. /**
  151. * Functions to save newly encountered tokens
  152. */
  153. var pushCloseParenToken = pushToken(tokenTypes.closeParen);
  154. var pushOpenParenToken = pushToken(tokenTypes.openParen);
  155. var pushNumberToken = pushToken(tokenTypes.number);
  156. var pushValtypeToken = pushToken(tokenTypes.valtype);
  157. var pushNameToken = pushToken(tokenTypes.name);
  158. var pushIdentifierToken = pushToken(tokenTypes.identifier);
  159. var pushKeywordToken = pushToken(tokenTypes.keyword);
  160. var pushDotToken = pushToken(tokenTypes.dot);
  161. var pushStringToken = pushToken(tokenTypes.string);
  162. var pushCommentToken = pushToken(tokenTypes.comment);
  163. var pushEqualToken = pushToken(tokenTypes.equal);
  164. /**
  165. * Can be used to look at the next character(s).
  166. *
  167. * The default behavior `lookahead()` simply returns the next character without consuming it.
  168. * Letters are always returned in lowercase.
  169. *
  170. * @param {number} length How many characters to query. Default = 1
  171. * @param {number} offset How many characters to skip forward from current one. Default = 1
  172. *
  173. */
  174. function lookahead() {
  175. var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
  176. var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
  177. return input.substring(current + offset, current + offset + length).toLowerCase();
  178. }
  179. /**
  180. * Advances the cursor in the input by a certain amount
  181. *
  182. * @param {number} amount How many characters to consume. Default = 1
  183. */
  184. function eatCharacter() {
  185. var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
  186. column += amount;
  187. current += amount;
  188. char = input[current];
  189. }
  190. while (current < input.length) {
  191. // ;;
  192. if (char === ";" && lookahead() === ";") {
  193. var startColumn = column;
  194. eatCharacter(2);
  195. var text = "";
  196. while (!isNewLine(char)) {
  197. text += char;
  198. eatCharacter();
  199. if (char === undefined) {
  200. break;
  201. }
  202. }
  203. var endColumn = column;
  204. pushCommentToken(text, {
  205. type: "leading",
  206. startColumn: startColumn,
  207. endColumn: endColumn
  208. });
  209. continue;
  210. } // (;
  211. if (char === "(" && lookahead() === ";") {
  212. var _startColumn = column;
  213. eatCharacter(2);
  214. var _text = ""; // ;)
  215. while (true) {
  216. char = input[current];
  217. if (char === ";" && lookahead() === ")") {
  218. eatCharacter(2);
  219. break;
  220. }
  221. _text += char;
  222. eatCharacter();
  223. if (isNewLine(char)) {
  224. line++;
  225. column = 0;
  226. }
  227. }
  228. var _endColumn = column;
  229. pushCommentToken(_text, {
  230. type: "block",
  231. startColumn: _startColumn,
  232. endColumn: _endColumn
  233. });
  234. continue;
  235. }
  236. if (char === "(") {
  237. pushOpenParenToken(char);
  238. eatCharacter();
  239. continue;
  240. }
  241. if (char === "=") {
  242. pushEqualToken(char);
  243. eatCharacter();
  244. continue;
  245. }
  246. if (char === ")") {
  247. pushCloseParenToken(char);
  248. eatCharacter();
  249. continue;
  250. }
  251. if (isNewLine(char)) {
  252. line++;
  253. eatCharacter();
  254. column = 0;
  255. continue;
  256. }
  257. if (WHITESPACE.test(char)) {
  258. eatCharacter();
  259. continue;
  260. }
  261. if (char === "$") {
  262. var _startColumn2 = column;
  263. eatCharacter();
  264. var value = "";
  265. while (idchar.test(char)) {
  266. value += char;
  267. eatCharacter();
  268. }
  269. var _endColumn2 = column;
  270. pushIdentifierToken(value, {
  271. startColumn: _startColumn2,
  272. endColumn: _endColumn2
  273. });
  274. continue;
  275. }
  276. if (NUMBERS.test(char) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || char === "-" || char === "+") {
  277. var _startColumn3 = column;
  278. var _value = numberLiteralFSM.run(input.slice(current));
  279. if (_value === "") {
  280. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  281. }
  282. pushNumberToken(_value, {
  283. startColumn: _startColumn3
  284. });
  285. eatCharacter(_value.length);
  286. if (char && !PARENS.test(char) && !WHITESPACE.test(char)) {
  287. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  288. }
  289. continue;
  290. }
  291. if (char === '"') {
  292. var _startColumn4 = column;
  293. var _value2 = "";
  294. eatCharacter(); // "
  295. while (char !== '"') {
  296. if (isNewLine(char)) {
  297. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  298. }
  299. _value2 += char;
  300. eatCharacter(); // char
  301. }
  302. eatCharacter(); // "
  303. var _endColumn3 = column;
  304. pushStringToken(_value2, {
  305. startColumn: _startColumn4,
  306. endColumn: _endColumn3
  307. });
  308. continue;
  309. }
  310. if (LETTERS.test(char)) {
  311. var _value3 = "";
  312. var _startColumn5 = column;
  313. while (char && LETTERS.test(char)) {
  314. _value3 += char;
  315. eatCharacter();
  316. }
  317. /*
  318. * Handle MemberAccess
  319. */
  320. if (char === ".") {
  321. var dotStartColumn = column;
  322. if (valtypes.indexOf(_value3) !== -1) {
  323. pushValtypeToken(_value3, {
  324. startColumn: _startColumn5
  325. });
  326. } else {
  327. pushNameToken(_value3);
  328. }
  329. eatCharacter();
  330. _value3 = "";
  331. var nameStartColumn = column;
  332. while (LETTERS.test(char)) {
  333. _value3 += char;
  334. eatCharacter();
  335. }
  336. pushDotToken(".", {
  337. startColumn: dotStartColumn
  338. });
  339. pushNameToken(_value3, {
  340. startColumn: nameStartColumn
  341. });
  342. continue;
  343. }
  344. /*
  345. * Handle keywords
  346. */
  347. // $FlowIgnore
  348. if (typeof keywords[_value3] === "string") {
  349. pushKeywordToken(_value3, {
  350. startColumn: _startColumn5
  351. });
  352. continue;
  353. }
  354. /*
  355. * Handle types
  356. */
  357. if (valtypes.indexOf(_value3) !== -1) {
  358. pushValtypeToken(_value3, {
  359. startColumn: _startColumn5
  360. });
  361. continue;
  362. }
  363. /*
  364. * Handle literals
  365. */
  366. pushNameToken(_value3, {
  367. startColumn: _startColumn5
  368. });
  369. continue;
  370. }
  371. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  372. }
  373. return tokens;
  374. }
  375. module.exports = {
  376. tokenize: tokenize,
  377. tokens: tokenTypes,
  378. keywords: keywords
  379. };