You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
434 lines
10 KiB
434 lines
10 KiB
import { FSM, makeTransition } from "@webassemblyjs/helper-fsm";
|
|
import { codeFrameFromSource } from "@webassemblyjs/helper-code-frame";
|
|
|
|
// eslint-disable-next-line
|
|
function getCodeFrame(source, line, column) {
|
|
var loc = {
|
|
start: {
|
|
line: line,
|
|
column: column
|
|
}
|
|
};
|
|
return "\n" + codeFrameFromSource(source, loc) + "\n";
|
|
}
|
|
|
|
var WHITESPACE = /\s/;
|
|
var PARENS = /\(|\)/;
|
|
var LETTERS = /[a-z0-9_/]/i;
|
|
var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
|
|
var valtypes = ["i32", "i64", "f32", "f64"];
|
|
var NUMBERS = /[0-9|.|_]/;
|
|
var NUMBER_KEYWORDS = /nan|inf/;
|
|
|
|
function isNewLine(char) {
|
|
return char.charCodeAt(0) === 10 || char.charCodeAt(0) === 13;
|
|
}
|
|
|
|
function Token(type, value, start, end) {
|
|
var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
|
|
var token = {
|
|
type: type,
|
|
value: value,
|
|
loc: {
|
|
start: start,
|
|
end: end
|
|
}
|
|
};
|
|
|
|
if (Object.keys(opts).length > 0) {
|
|
// $FlowIgnore
|
|
token["opts"] = opts;
|
|
}
|
|
|
|
return token;
|
|
}
|
|
|
|
var tokenTypes = {
|
|
openParen: "openParen",
|
|
closeParen: "closeParen",
|
|
number: "number",
|
|
string: "string",
|
|
name: "name",
|
|
identifier: "identifier",
|
|
valtype: "valtype",
|
|
dot: "dot",
|
|
comment: "comment",
|
|
equal: "equal",
|
|
keyword: "keyword"
|
|
};
|
|
export var keywords = {
|
|
module: "module",
|
|
func: "func",
|
|
param: "param",
|
|
result: "result",
|
|
export: "export",
|
|
loop: "loop",
|
|
block: "block",
|
|
if: "if",
|
|
then: "then",
|
|
else: "else",
|
|
call: "call",
|
|
call_indirect: "call_indirect",
|
|
import: "import",
|
|
memory: "memory",
|
|
table: "table",
|
|
global: "global",
|
|
anyfunc: "anyfunc",
|
|
mut: "mut",
|
|
data: "data",
|
|
type: "type",
|
|
elem: "elem",
|
|
start: "start",
|
|
offset: "offset"
|
|
};
|
|
var NUMERIC_SEPARATOR = "_";
|
|
/**
|
|
* Build the FSM for number literals
|
|
*/
|
|
|
|
var numberLiteralFSM = new FSM({
|
|
START: [makeTransition(/-|\+/, "AFTER_SIGN"), makeTransition(/nan:0x/, "NAN_HEX", {
|
|
n: 6
|
|
}), makeTransition(/nan|inf/, "STOP", {
|
|
n: 3
|
|
}), makeTransition(/0x/, "HEX", {
|
|
n: 2
|
|
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
|
|
AFTER_SIGN: [makeTransition(/nan:0x/, "NAN_HEX", {
|
|
n: 6
|
|
}), makeTransition(/nan|inf/, "STOP", {
|
|
n: 3
|
|
}), makeTransition(/0x/, "HEX", {
|
|
n: 2
|
|
}), makeTransition(/[0-9]/, "DEC"), makeTransition(/\./, "DEC_FRAC")],
|
|
DEC_FRAC: [makeTransition(/[0-9]/, "DEC_FRAC", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
}), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
|
|
DEC: [makeTransition(/[0-9]/, "DEC", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
}), makeTransition(/\./, "DEC_FRAC"), makeTransition(/e|E/, "DEC_SIGNED_EXP")],
|
|
DEC_SIGNED_EXP: [makeTransition(/\+|-/, "DEC_EXP"), makeTransition(/[0-9]/, "DEC_EXP")],
|
|
DEC_EXP: [makeTransition(/[0-9]/, "DEC_EXP", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
})],
|
|
HEX: [makeTransition(/[0-9|A-F|a-f]/, "HEX", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
}), makeTransition(/\./, "HEX_FRAC"), makeTransition(/p|P/, "HEX_SIGNED_EXP")],
|
|
HEX_FRAC: [makeTransition(/[0-9|A-F|a-f]/, "HEX_FRAC", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
}), makeTransition(/p|P|/, "HEX_SIGNED_EXP")],
|
|
HEX_SIGNED_EXP: [makeTransition(/[0-9|+|-]/, "HEX_EXP")],
|
|
HEX_EXP: [makeTransition(/[0-9]/, "HEX_EXP", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
})],
|
|
NAN_HEX: [makeTransition(/[0-9|A-F|a-f]/, "NAN_HEX", {
|
|
allowedSeparator: NUMERIC_SEPARATOR
|
|
})],
|
|
STOP: []
|
|
}, "START", "STOP");
|
|
export function tokenize(input) {
|
|
var current = 0;
|
|
var char = input[current]; // Used by SourceLocation
|
|
|
|
var column = 1;
|
|
var line = 1;
|
|
var tokens = [];
|
|
/**
|
|
* Creates a pushToken function for a given type
|
|
*/
|
|
|
|
function pushToken(type) {
|
|
return function (v) {
|
|
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
var startColumn = opts.startColumn || column - String(v).length;
|
|
delete opts.startColumn;
|
|
var endColumn = opts.endColumn || startColumn + String(v).length - 1;
|
|
delete opts.endColumn;
|
|
var start = {
|
|
line: line,
|
|
column: startColumn
|
|
};
|
|
var end = {
|
|
line: line,
|
|
column: endColumn
|
|
};
|
|
tokens.push(Token(type, v, start, end, opts));
|
|
};
|
|
}
|
|
/**
|
|
* Functions to save newly encountered tokens
|
|
*/
|
|
|
|
|
|
var pushCloseParenToken = pushToken(tokenTypes.closeParen);
|
|
var pushOpenParenToken = pushToken(tokenTypes.openParen);
|
|
var pushNumberToken = pushToken(tokenTypes.number);
|
|
var pushValtypeToken = pushToken(tokenTypes.valtype);
|
|
var pushNameToken = pushToken(tokenTypes.name);
|
|
var pushIdentifierToken = pushToken(tokenTypes.identifier);
|
|
var pushKeywordToken = pushToken(tokenTypes.keyword);
|
|
var pushDotToken = pushToken(tokenTypes.dot);
|
|
var pushStringToken = pushToken(tokenTypes.string);
|
|
var pushCommentToken = pushToken(tokenTypes.comment);
|
|
var pushEqualToken = pushToken(tokenTypes.equal);
|
|
/**
|
|
* Can be used to look at the next character(s).
|
|
*
|
|
* The default behavior `lookahead()` simply returns the next character without consuming it.
|
|
* Letters are always returned in lowercase.
|
|
*
|
|
* @param {number} length How many characters to query. Default = 1
|
|
* @param {number} offset How many characters to skip forward from current one. Default = 1
|
|
*
|
|
*/
|
|
|
|
function lookahead() {
|
|
var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
|
var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
|
|
return input.substring(current + offset, current + offset + length).toLowerCase();
|
|
}
|
|
/**
|
|
* Advances the cursor in the input by a certain amount
|
|
*
|
|
* @param {number} amount How many characters to consume. Default = 1
|
|
*/
|
|
|
|
|
|
function eatCharacter() {
|
|
var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
|
column += amount;
|
|
current += amount;
|
|
char = input[current];
|
|
}
|
|
|
|
while (current < input.length) {
|
|
// ;;
|
|
if (char === ";" && lookahead() === ";") {
|
|
var startColumn = column;
|
|
eatCharacter(2);
|
|
var text = "";
|
|
|
|
while (!isNewLine(char)) {
|
|
text += char;
|
|
eatCharacter();
|
|
|
|
if (char === undefined) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
var endColumn = column;
|
|
pushCommentToken(text, {
|
|
type: "leading",
|
|
startColumn: startColumn,
|
|
endColumn: endColumn
|
|
});
|
|
continue;
|
|
} // (;
|
|
|
|
|
|
if (char === "(" && lookahead() === ";") {
|
|
var _startColumn = column;
|
|
eatCharacter(2);
|
|
var _text = ""; // ;)
|
|
|
|
while (true) {
|
|
char = input[current];
|
|
|
|
if (char === ";" && lookahead() === ")") {
|
|
eatCharacter(2);
|
|
break;
|
|
}
|
|
|
|
_text += char;
|
|
eatCharacter();
|
|
|
|
if (isNewLine(char)) {
|
|
line++;
|
|
column = 0;
|
|
}
|
|
}
|
|
|
|
var _endColumn = column;
|
|
pushCommentToken(_text, {
|
|
type: "block",
|
|
startColumn: _startColumn,
|
|
endColumn: _endColumn
|
|
});
|
|
continue;
|
|
}
|
|
|
|
if (char === "(") {
|
|
pushOpenParenToken(char);
|
|
eatCharacter();
|
|
continue;
|
|
}
|
|
|
|
if (char === "=") {
|
|
pushEqualToken(char);
|
|
eatCharacter();
|
|
continue;
|
|
}
|
|
|
|
if (char === ")") {
|
|
pushCloseParenToken(char);
|
|
eatCharacter();
|
|
continue;
|
|
}
|
|
|
|
if (isNewLine(char)) {
|
|
line++;
|
|
eatCharacter();
|
|
column = 0;
|
|
continue;
|
|
}
|
|
|
|
if (WHITESPACE.test(char)) {
|
|
eatCharacter();
|
|
continue;
|
|
}
|
|
|
|
if (char === "$") {
|
|
var _startColumn2 = column;
|
|
eatCharacter();
|
|
var value = "";
|
|
|
|
while (idchar.test(char)) {
|
|
value += char;
|
|
eatCharacter();
|
|
}
|
|
|
|
var _endColumn2 = column;
|
|
pushIdentifierToken(value, {
|
|
startColumn: _startColumn2,
|
|
endColumn: _endColumn2
|
|
});
|
|
continue;
|
|
}
|
|
|
|
if (NUMBERS.test(char) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || char === "-" || char === "+") {
|
|
var _startColumn3 = column;
|
|
|
|
var _value = numberLiteralFSM.run(input.slice(current));
|
|
|
|
if (_value === "") {
|
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
|
}
|
|
|
|
pushNumberToken(_value, {
|
|
startColumn: _startColumn3
|
|
});
|
|
eatCharacter(_value.length);
|
|
|
|
if (char && !PARENS.test(char) && !WHITESPACE.test(char)) {
|
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if (char === '"') {
|
|
var _startColumn4 = column;
|
|
var _value2 = "";
|
|
eatCharacter(); // "
|
|
|
|
while (char !== '"') {
|
|
if (isNewLine(char)) {
|
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
|
}
|
|
|
|
_value2 += char;
|
|
eatCharacter(); // char
|
|
}
|
|
|
|
eatCharacter(); // "
|
|
|
|
var _endColumn3 = column;
|
|
pushStringToken(_value2, {
|
|
startColumn: _startColumn4,
|
|
endColumn: _endColumn3
|
|
});
|
|
continue;
|
|
}
|
|
|
|
if (LETTERS.test(char)) {
|
|
var _value3 = "";
|
|
var _startColumn5 = column;
|
|
|
|
while (char && LETTERS.test(char)) {
|
|
_value3 += char;
|
|
eatCharacter();
|
|
}
|
|
/*
|
|
* Handle MemberAccess
|
|
*/
|
|
|
|
|
|
if (char === ".") {
|
|
var dotStartColumn = column;
|
|
|
|
if (valtypes.indexOf(_value3) !== -1) {
|
|
pushValtypeToken(_value3, {
|
|
startColumn: _startColumn5
|
|
});
|
|
} else {
|
|
pushNameToken(_value3);
|
|
}
|
|
|
|
eatCharacter();
|
|
_value3 = "";
|
|
var nameStartColumn = column;
|
|
|
|
while (LETTERS.test(char)) {
|
|
_value3 += char;
|
|
eatCharacter();
|
|
}
|
|
|
|
pushDotToken(".", {
|
|
startColumn: dotStartColumn
|
|
});
|
|
pushNameToken(_value3, {
|
|
startColumn: nameStartColumn
|
|
});
|
|
continue;
|
|
}
|
|
/*
|
|
* Handle keywords
|
|
*/
|
|
// $FlowIgnore
|
|
|
|
|
|
if (typeof keywords[_value3] === "string") {
|
|
pushKeywordToken(_value3, {
|
|
startColumn: _startColumn5
|
|
});
|
|
continue;
|
|
}
|
|
/*
|
|
* Handle types
|
|
*/
|
|
|
|
|
|
if (valtypes.indexOf(_value3) !== -1) {
|
|
pushValtypeToken(_value3, {
|
|
startColumn: _startColumn5
|
|
});
|
|
continue;
|
|
}
|
|
/*
|
|
* Handle literals
|
|
*/
|
|
|
|
|
|
pushNameToken(_value3, {
|
|
startColumn: _startColumn5
|
|
});
|
|
continue;
|
|
}
|
|
|
|
throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
export var tokens = tokenTypes;
|