Files
zed-p8/grammars/pico-8-lua/grammar.js
T
2026-05-15 00:16:13 -07:00

620 lines
18 KiB
JavaScript

/**
* @file PICO-8 Lua grammar for tree-sitter
*
* Forked from tree-sitter-lua 0.5.0 by Munif Tanjim ( MIT — see
* UPSTREAM-LICENSE.md ). This fork adds the PICO-8 dialect extensions
* documented in the PICO-8 manual:
*
* - != as alias for ~=
* - Integer divide: \
* - Bitwise XOR (binary): ^^
* - Logical shift right: >>>
* - Rotate left: <<>
* - Rotate right: >><
* - Compound-assignment statements: += -= *= /= %= \= ^= ..= &= |= ^^=
* <<= >>= >>>= <<>= >><=
* - Memory peek prefix unary operators: @addr %addr $addr
* ( these coexist with binary % for modulo )
* - Single-line if (cond) stmt [else stmt] — no `then`/`end`
* - Single-line while (cond) stmt — no `do`/`end`
* - Statement-level print shorthand: `?` followed by an expression list
* - `#include path` directive
*/
/// <reference types="tree-sitter-cli/dsl" />
// @ts-check
const PREC = {
OR: 1, // or
AND: 2, // and
COMPARE: 3, // < > <= >= ~= == !=
BIT_OR: 4, // |
BIT_NOT: 5, // ~ ^^
BIT_AND: 6, // &
BIT_SHIFT: 7, // << >> >>> <<> >><
CONCAT: 8, // ..
PLUS: 9, // + -
MULTI: 10, // * / // % \
UNARY: 11, // not # - ~ @ $ %
POWER: 12, // ^
};
const list_seq = (rule, separator, trailing_separator = false) =>
trailing_separator
? seq(rule, repeat(seq(separator, rule)), optional(separator))
: seq(rule, repeat(seq(separator, rule)));
const optional_block = ($) => alias(optional($._block), $.block);
// namelist ::= Name {',' Name}
const name_list = ($) => list_seq(field('name', $.identifier), ',');
const COMPOUND_ASSIGN_OPERATORS = [
'+=', '-=', '*=', '/=', '%=', '\\=', '^=', '..=',
'&=', '|=', '^^=',
'<<=', '>>=', '>>>=', '<<>=', '>><=',
];
export default grammar({
name: 'pico8_lua',
extras: ($) => [$.comment, /\s/],
externals: ($) => [
$._block_comment_start,
$._block_comment_content,
$._block_comment_end,
$._block_string_start,
$._block_string_content,
$._block_string_end,
// PICO-8 line-significance: terminates the body of `if (cond) ...` /
// `while (cond) ...` shorthand. The scanner emits this only when the
// parser is at a state expecting it; everywhere else a newline falls
// through to /\s/ in extras and is skipped. See src/scanner.c.
$._line_end,
],
supertypes: ($) => [$.statement, $.expression, $.declaration, $.variable],
word: ($) => $.identifier,
// `if (cond) ...` is ambiguous between a standard if where the condition
// is a parenthesized_expression and a shorthand if. Same for while. The
// ambiguity resolves by what follows the closing `)` ( `then`/`do` for
// the standard form, anything else for the shorthand ).
conflicts: ($) => [
[$.parenthesized_expression, $.shorthand_if_statement],
[$.parenthesized_expression, $.shorthand_while_statement],
],
rules: {
// chunk ::= block
chunk: ($) =>
seq(
optional($.hash_bang_line),
repeat($.statement),
optional($.return_statement)
),
hash_bang_line: (_) => /#![^\n]*/,
// block ::= {stat} [retstat]
_block: ($) =>
choice(
seq(repeat1($.statement), optional($.return_statement)),
seq(repeat($.statement), $.return_statement)
),
statement: ($) =>
choice(
$.empty_statement,
$.assignment_statement,
$.compound_assignment_statement,
$.function_call,
$.label_statement,
$.break_statement,
$.goto_statement,
$.do_statement,
$.while_statement,
$.shorthand_while_statement,
$.repeat_statement,
$.if_statement,
$.shorthand_if_statement,
$.for_statement,
$.declaration,
$.print_shorthand_statement,
$.include_statement,
),
// retstat ::= return [explist] [';']
return_statement: ($) =>
seq(
'return',
optional(alias($._expression_list, $.expression_list)),
optional(';')
),
empty_statement: (_) => ';',
assignment_statement: ($) =>
seq(
alias($._variable_assignment_varlist, $.variable_list),
field('operator', '='),
alias($._variable_assignment_explist, $.expression_list)
),
_variable_assignment_varlist: ($) =>
list_seq(field('name', $.variable), ','),
_variable_assignment_explist: ($) =>
list_seq(field('value', $.expression), ','),
// PICO-8 compound assignment: var OP= expr (single statement, single line).
compound_assignment_statement: ($) =>
seq(
field('name', $.variable),
field('operator', choice(...COMPOUND_ASSIGN_OPERATORS)),
field('value', $.expression)
),
label_statement: ($) => seq('::', $.identifier, '::'),
break_statement: (_) => 'break',
goto_statement: ($) => seq('goto', $.identifier),
do_statement: ($) => seq('do', field('body', optional_block($)), 'end'),
while_statement: ($) =>
seq(
'while',
field('condition', $.expression),
'do',
field('body', optional_block($)),
'end'
),
// PICO-8 single-line: while (cond) stmt {stmt}
// Body extends to end-of-line (or EOF). The $._line_end terminator
// is emitted by the external scanner when it sees \n/\r/EOF at a
// position where the parser expects line-end; until then, additional
// statements on the same line accumulate into the body.
shorthand_while_statement: ($) =>
seq(
'while',
'(',
field('condition', $.expression),
')',
field('body', $.statement),
repeat(field('body', $.statement)),
$._line_end
),
repeat_statement: ($) =>
seq(
'repeat',
field('body', optional_block($)),
'until',
field('condition', $.expression)
),
if_statement: ($) =>
seq(
'if',
field('condition', $.expression),
'then',
field('consequence', optional_block($)),
repeat(field('alternative', $.elseif_statement)),
optional(field('alternative', $.else_statement)),
'end'
),
elseif_statement: ($) =>
seq(
'elseif',
field('condition', $.expression),
'then',
field('consequence', optional_block($))
),
else_statement: ($) => seq('else', field('body', optional_block($))),
// PICO-8 single-line: if (cond) stmt {stmt} [else stmt {stmt}]
// Both the consequence and the alternative extend to end-of-line.
// The $._line_end terminator (emitted by the external scanner on
// \n/\r/EOF) prevents a later-line `else` from binding to a
// shorthand `if` on a previous line, matching PICO-8 semantics.
shorthand_if_statement: ($) =>
seq(
'if',
'(',
field('condition', $.expression),
')',
field('consequence', $.statement),
repeat(field('consequence', $.statement)),
optional(
seq(
'else',
field('alternative', $.statement),
repeat(field('alternative', $.statement))
)
),
$._line_end
),
for_statement: ($) =>
seq(
'for',
field('clause', choice($.for_generic_clause, $.for_numeric_clause)),
'do',
field('body', optional_block($)),
'end'
),
for_generic_clause: ($) =>
seq(
alias($._name_list, $.variable_list),
'in',
alias($._expression_list, $.expression_list)
),
for_numeric_clause: ($) =>
seq(
field('name', $.identifier),
field('operator', '='),
field('start', $.expression),
',',
field('end', $.expression),
optional(seq(',', field('step', $.expression)))
),
_name_list: ($) => name_list($),
declaration: ($) =>
choice(
$.function_declaration,
field(
'local_declaration',
alias($._local_function_declaration, $.function_declaration)
),
field('local_declaration', $.variable_declaration),
),
function_declaration: ($) =>
seq('function', field('name', $._function_name), $._function_body),
_local_function_declaration: ($) =>
seq('local', 'function', field('name', $.identifier), $._function_body),
_function_name: ($) =>
choice(
$._function_name_prefix_expression,
alias(
$._function_name_method_index_expression,
$.method_index_expression
)
),
_function_name_prefix_expression: ($) =>
choice(
$.identifier,
alias($._function_name_dot_index_expression, $.dot_index_expression)
),
_function_name_dot_index_expression: ($) =>
seq(
field('table', $._function_name_prefix_expression),
'.',
field('field', $.identifier)
),
_function_name_method_index_expression: ($) =>
seq(
field('table', $._function_name_prefix_expression),
':',
field('method', $.identifier)
),
variable_declaration: ($) =>
seq(
'local',
choice(
alias($._att_name_list, $.variable_list),
alias($._variable_assignment, $.assignment_statement)
)
),
_variable_assignment: ($) =>
seq(
alias($._att_name_list, $.variable_list),
field('operator', '='),
alias($._variable_assignment_explist, $.expression_list)
),
_att_name_list: ($) =>
seq(
optional(field('attribute', alias($._attrib, $.attribute))),
list_seq(
seq(
field('name', $.identifier),
optional(field('attribute', alias($._attrib, $.attribute)))
),
','
),
),
_attrib: ($) => seq('<', $.identifier, '>'),
_expression_list: ($) => list_seq($.expression, ','),
// PICO-8 print shorthand: ? expr {, expr}
print_shorthand_statement: ($) =>
seq(
field('directive', '?'),
list_seq(field('argument', $.expression), ',')
),
// PICO-8 include directive: #include path
// Tokenized greedily as `#include` + whitespace so that the standalone
// `#` (unary length operator) and identifier-starting `#x` continue to
// parse as length-of-expression.
include_statement: ($) =>
seq(
field('directive', alias(token(prec(2, /#include[ \t]+/)), '#include')),
field('path', alias(/[^\n\r]*/, $.include_path))
),
expression: ($) =>
choice(
$.nil,
$.false,
$.true,
$.number,
$.string,
$.vararg_expression,
$.function_definition,
$.variable,
$.function_call,
$.parenthesized_expression,
$.table_constructor,
$.binary_expression,
$.unary_expression
),
nil: (_) => 'nil',
false: (_) => 'false',
true: (_) => 'true',
number: (_) => {
function number_literal(digits, exponent_marker, exponent_digits) {
return seq(
choice(
seq(optional(digits), optional('.'), digits),
seq(digits, optional('.'), optional(digits))
),
optional(
seq(
choice(
exponent_marker.toLowerCase(),
exponent_marker.toUpperCase()
),
seq(optional(choice('-', '+')), exponent_digits)
)
)
);
}
const decimal_digits = /[0-9]+/;
const decimal_literal = number_literal(decimal_digits, 'e', decimal_digits);
const hex_digits = /[a-fA-F0-9]+/;
const hex_literal = seq(
choice('0x', '0X'),
number_literal(hex_digits, 'p', decimal_digits)
);
const bin_digits = /[01]+/;
const bin_literal = seq(choice('0b', '0B'), bin_digits);
return token(choice(decimal_literal, hex_literal, bin_literal));
},
string: ($) => choice($._quote_string, $._block_string),
_quote_string: ($) =>
choice(
seq(
field('start', alias('"', '"')),
field(
'content',
optional(alias($._doublequote_string_content, $.string_content))
),
field('end', alias('"', '"'))
),
seq(
field('start', alias("'", "'")),
field(
'content',
optional(alias($._singlequote_string_content, $.string_content))
),
field('end', alias("'", "'"))
)
),
_doublequote_string_content: ($) =>
repeat1(choice(token.immediate(prec(1, /[^"\\]+/)), $.escape_sequence)),
_singlequote_string_content: ($) =>
repeat1(choice(token.immediate(prec(1, /[^'\\]+/)), $.escape_sequence)),
_block_string: ($) =>
seq(
field('start', alias($._block_string_start, '[[')),
field('content', alias($._block_string_content, $.string_content)),
field('end', alias($._block_string_end, ']]'))
),
escape_sequence: () =>
token.immediate(
seq(
'\\',
choice(
/[\nabfnrtv\\'"]/,
/z\s*/,
/[0-9]{1,3}/,
/x[0-9a-fA-F]{2}/,
/u\{[0-9a-fA-F]+\}/
)
)
),
vararg_expression: (_) => '...',
function_definition: ($) => seq('function', $._function_body),
_function_body: ($) =>
seq(
field('parameters', $.parameters),
field('body', optional_block($)),
'end'
),
parameters: ($) => seq('(', optional($._parameter_list), ')'),
_parameter_list: ($) =>
choice(
seq(name_list($), optional(seq(',', $._vararg_parameter))),
$._vararg_parameter
),
_vararg_parameter: ($) =>
seq($.vararg_expression, optional(field('name', $.identifier))),
_prefix_expression: ($) =>
prec(1, choice($.variable, $.function_call, $.parenthesized_expression)),
variable: ($) =>
choice($.identifier, $.bracket_index_expression, $.dot_index_expression),
bracket_index_expression: ($) =>
seq(
field('table', $._prefix_expression),
'[',
field('field', $.expression),
']'
),
dot_index_expression: ($) =>
seq(
field('table', $._prefix_expression),
'.',
field('field', $.identifier)
),
function_call: ($) =>
seq(
field('name', choice($._prefix_expression, $.method_index_expression)),
field('arguments', $.arguments)
),
method_index_expression: ($) =>
seq(
field('table', $._prefix_expression),
':',
field('method', $.identifier)
),
arguments: ($) =>
choice(
seq('(', optional(list_seq($.expression, ',')), ')'),
$.table_constructor,
$.string
),
parenthesized_expression: ($) => seq('(', $.expression, ')'),
table_constructor: ($) => seq('{', optional($._field_list), '}'),
_field_list: ($) => list_seq($.field, $._field_sep, true),
_field_sep: (_) => choice(',', ';'),
field: ($) =>
choice(
seq(
'[',
field('name', $.expression),
']',
field('operator', '='),
field('value', $.expression)
),
seq(field('name', $.identifier), '=', field('value', $.expression)),
field('value', $.expression)
),
binary_expression: ($) =>
choice(
...[
['or', PREC.OR],
['and', PREC.AND],
['<', PREC.COMPARE],
['<=', PREC.COMPARE],
['==', PREC.COMPARE],
['~=', PREC.COMPARE],
['!=', PREC.COMPARE], // PICO-8 alias for ~=
['>=', PREC.COMPARE],
['>', PREC.COMPARE],
['|', PREC.BIT_OR],
['~', PREC.BIT_NOT], // bitwise xor (Lua 5.3 binary form)
['^^', PREC.BIT_NOT], // PICO-8 bitwise xor
['&', PREC.BIT_AND],
['<<', PREC.BIT_SHIFT],
['>>', PREC.BIT_SHIFT],
['>>>', PREC.BIT_SHIFT], // PICO-8 logical shift right
['<<>', PREC.BIT_SHIFT], // PICO-8 rotate left
['>><', PREC.BIT_SHIFT], // PICO-8 rotate right
['+', PREC.PLUS],
['-', PREC.PLUS],
['*', PREC.MULTI],
['/', PREC.MULTI],
['//', PREC.MULTI],
['%', PREC.MULTI],
['\\', PREC.MULTI], // PICO-8 integer divide
].map(([operator, precedence]) =>
prec.left(
precedence,
seq(
field('left', $.expression),
field('operator', operator),
field('right', $.expression)
)
)
),
...[
['..', PREC.CONCAT],
['^', PREC.POWER],
].map(([operator, precedence]) =>
prec.right(
precedence,
seq(
field('left', $.expression),
field('operator', operator),
field('right', $.expression)
)
)
)
),
unary_expression: ($) =>
prec.left(
PREC.UNARY,
seq(
// @ $ % are PICO-8 peek prefixes ( peek / peek4 / peek2 ).
// % collides lexically with binary modulo; the GLR parser
// resolves usage by surrounding context.
field('operator', choice('not', '#', '-', '~', '@', '$', '%')),
field('operand', $.expression),
)
),
identifier: (_) => {
// PICO-8 dialect carves out !, ?, @, $ as operator tokens, so they
// are not valid in identifiers ( upstream allowed them ).
const identifier_start =
/[^\p{Control}\s!?@$+\-*/%^#&~|<>=(){}\[\];:,.\\'"\d]/;
const identifier_continue =
/[^\p{Control}\s!?@$+\-*/%^#&~|<>=(){}\[\];:,.\\'"]*/;
return token(seq(identifier_start, identifier_continue));
},
comment: ($) =>
choice(
seq(
field('start', '--'),
field('content', alias(/[^\r\n]*/, $.comment_content))
),
seq(
field('start', alias($._block_comment_start, '[[')),
field('content', alias($._block_comment_content, $.comment_content)),
field('end', alias($._block_comment_end, ']]'))
)
),
},
});