zed-p8/grammars/p8-cart/grammar.js

/**
 * tree-sitter grammar for the PICO-8 .p8 cartridge text format.
 *
 * The .p8 format is a flat text container divided into named sections
 * delimited by lines of the form `__name__`. The first section is
 * always `__lua__` and contains the cart's Lua source; the remaining
 * sections (`__gfx__`, `__gff__`, `__label__`, `__map__`, `__sfx__`,
 * `__music__`) hold hex-encoded asset data. The file begins with a
 * fixed magic header line and a `version N` line.
 *
 * This grammar is intentionally minimal: it parses the section
 * structure and exposes each section's body as a single named node
 * so that injection queries (see languages/pico8-cart/injections.scm)
 * can hand the contents off to other languages — most importantly
 * Lua for the `__lua__` section.
 */

module.exports = grammar({
  name: 'p8_cart',

  // Whitespace is significant inside hex sections, so we don't skip it.
  extras: $ => [],

  rules: {
    cartridge: $ => seq(
      optional($.header),
      optional($.version),
      repeat($.section),
    ),

    header: $ => /pico-8 cartridge \/\/[^\n]*\n/,
    version: $ => /version[ \t]+\d+\n/,

    section: $ => choice(
      $.lua_section,
      $.gfx_section,
      $.gff_section,
      $.label_section,
      $.map_section,
      $.sfx_section,
      $.music_section,
      $.unknown_section,
    ),

    lua_section:     $ => seq($.lua_marker,    optional($.lua_content)),
    gfx_section:     $ => seq($.gfx_marker,    optional($.body)),
    gff_section:     $ => seq($.gff_marker,    optional($.body)),
    label_section:   $ => seq($.label_marker,  optional($.body)),
    map_section:     $ => seq($.map_marker,    optional($.body)),
    sfx_section:     $ => seq($.sfx_marker,    optional($.body)),
    music_section:   $ => seq($.music_marker,  optional($.body)),
    unknown_section: $ => seq($.section_marker, optional($.body)),

    lua_marker:     $ => token(prec(2, '__lua__\n')),
    gfx_marker:     $ => token(prec(2, '__gfx__\n')),
    gff_marker:     $ => token(prec(2, '__gff__\n')),
    label_marker:   $ => token(prec(2, '__label__\n')),
    map_marker:     $ => token(prec(2, '__map__\n')),
    sfx_marker:     $ => token(prec(2, '__sfx__\n')),
    music_marker:   $ => token(prec(2, '__music__\n')),
    section_marker: $ => token(prec(1, /__[a-z][a-z0-9_]*__\n/)),

    lua_content: $ => repeat1($.line),
    body: $ => repeat1($.line),

    // A single physical line. The lexer prefers section markers over
    // generic lines via the precedence above, so a line that happens
    // to be exactly `__name__\n` will tokenize as a marker, not a line.
    line: $ => choice(
      token(prec(0, /[^\n]*\n/)),
      token(prec(0, /[^\n]+/)),  // final line with no trailing newline
    ),
  },
});