Add more structured syntax

This commit is contained in:
E Dunbar 2025-05-13 16:17:42 -05:00
parent d330f85c67
commit 66026fe74b
11 changed files with 18956 additions and 4528 deletions

View file

@ -9,9 +9,8 @@
const regexp = {
ascii_whitespace: /[\u0009\u000A\u000B\u000C\u000D\u0020]/,
symbol_seq: /[^()\[\]{};"'`~:.\d\u0009\u000A\u000B\u000C\u000D\u0020][^()\[\]{};"'`~.\u0009\u000A\u000B\u000C\u000D\u0020]*/
}
const symbol_seq_immediate = token.immediate(regexp.symbol_seq)
const plus_minus = choice('+', '-')
const digitpart = seq(/\d/, repeat(/_*\d+/))
const pointfloat = choice(
seq(optional(digitpart), '.', digitpart),
@ -19,7 +18,7 @@ const pointfloat = choice(
)
const exponentfloat = seq(
choice(digitpart, pointfloat),
seq(/[eE]/, optional(/[+-]/), digitpart),
seq(choice('e', 'E'), optional(plus_minus), digitpart),
)
module.exports = grammar({
@ -30,50 +29,65 @@ module.exports = grammar({
$.comment,
],
// word: $ => $.symbol,
word: $ => $.symbol,
rules: {
// SYNTACTIC ELEMENTS
source_file: $ => seq(optional($.shebang), repeat($._element)),
shebang: _ => token(seq('#!', /.*/)),
_element: $ => choice($._form, $.discard, $.comment),
_element: $ => choice(
$._form,
$.discard,
$.comment,
$.import,
$.require,
$.function,
$.lambda,
$.class,
$.macro,
$.reader,
),
_form: $ => seq(optional($._sugar), choice($._identifier, $._sequence, $._string)),
_form: $ => seq(optional($.sugar), choice($._identifier, $._sequence, $._string)),
discard: $ => seq('#_', $._form),
comment: _ => token(seq(';', /.*/)),
_sugar: _ => choice(
field('quote', '\''),
field('quasiquote', '`'),
field('unqoute', '~'),
field('unqoute_splice', '~@'),
field('unpack_iterable', '#*'),
field('unpack_mapping', '#**'),
sugar: _ => choice(
'\'',
'`',
'~',
'~@',
'#*',
'#**',
),
_identifier: $ => choice(
$._numeric_literal,
$.keyword,
$.symbol,
$._symbol_or_dots,
$.dotted_identifier,
),
_sequence: $ => choice($.expression, $.list, $.tuple, $.set, $.dictionary),
_string: $ => choice($.string, $.bracket_string),
_numeric_literal: $ => choice($.integer, $.float, $.complex),
keyword: _ => token(seq(':', optional(regexp.symbol_seq))),
dotted_identifier: _ => prec(1, choice(
keyword: $ => prec.right(seq(
':',
optional($.immediate_symbol),
)),
_symbol_or_dots: $ => choice(
$.symbol,
$.dots,
),
dotted_identifier: $ => choice(
seq(
/[.]+/,
symbol_seq_immediate,
repeat(seq(token.immediate('.'), symbol_seq_immediate)),
$.immediate_symbol,
repeat(seq(token.immediate('.'), $.immediate_symbol)),
),
seq(
regexp.symbol_seq,
repeat1((seq(token.immediate('.'), symbol_seq_immediate)))),
)),
symbol: _ => choice(
/[.]+/,
regexp.symbol_seq,
field("sym", $.symbol),
repeat1(seq(token.immediate('.'), $.immediate_symbol))),
),
expression: $ => seq('(', repeat1($._element), ')'),
@ -91,32 +105,219 @@ module.exports = grammar({
'}'
),
string: _ => token(seq(
/[rbf]{0,3}/,
'"',
/[^"]*/,
string: _ => seq(
/[rbf]*"/,
field("content", /[^"]*/),
'"'
)),
bracket_string: _ => token(seq('#[[', /[^\]]*/, ']]')),
),
bracket_string: _ => seq(
'#[[',
field("content", /[^\]]*/),
']]'
),
integer: $ => choice($._decinteger, $._bininteger, $._octinteger, $._hexinteger),
float: _ => token(prec(1, seq(
optional(/[+-]/),
optional(plus_minus),
choice(pointfloat, exponentfloat, 'Inf', 'NaN'),
))),
complex: _ => token(prec(1, seq(
optional(/[+-]/),
optional(plus_minus),
choice(pointfloat, exponentfloat, digitpart, 'Inf', 'NaN'),
/[+-]/,
plus_minus,
seq(
choice(pointfloat, exponentfloat, digitpart, 'NaN', 'Inf'),
/[jJ]/,
choice('j', 'J'),
),
))),
_decinteger: _ => token(prec(1, seq(optional(/[+-]/), /\d/, repeat(/[,_]*\d+/)))),
symbol: _ => token(seq(
/[^()\[\]{};"'`~:.\d\u0009\u000A\u000B\u000C\u000D\u0020]/,
repeat(/[^()\[\]{};"'`~.\u0009\u000A\u000B\u000C\u000D\u0020]/),
)),
immediate_symbol: _ => token.immediate(seq(
/[^()\[\]{};"'`~:.\d\u0009\u000A\u000B\u000C\u000D\u0020]/,
repeat(/[^()\[\]{};"'`~.\u0009\u000A\u000B\u000C\u000D\u0020]/),
)),
dots: _ => /[.]+/,
_decinteger: _ => token(prec(1, seq(optional(plus_minus), /\d/, repeat(/[,_]*\d+/)))),
_bininteger: _ => token(prec(1, seq('0', /[bB]/, repeat(/[,_]*[01]+/)))),
_octinteger: _ => token(prec(1, seq('0', /[oO]/, repeat(/[,_]*[0-7]+/)))),
_hexinteger: _ => token(prec(1, seq('0', /[xX]/, repeat(/[,_]*[\da-fA-F]+/)))),
// STRUCTURED SYNTAX
import: $ => seq(
'(',
'import',
repeat1(
choice(
$.module_import,
$.named_import,
),
),
')'
),
require: $ => seq(
'(',
'require',
repeat1(
choice(
$.module_import,
$.named_import,
$.namespace_require,
),
),
')'
),
function: $ => seq(
'(',
'defn',
optional(
seq(
':',
token.immediate('async'),
)
),
field('decorators', optional($.variable_list)),
optional($.type_parameters),
optional($.type_annotation),
field('name', $.symbol),
$.parameter_list,
repeat($._element),
')',
),
lambda: $ => seq(
'(',
'fn',
optional(
seq(
':',
token.immediate('async'),
)
),
$.parameter_list,
repeat($._element),
')',
),
class: $ => seq(
'(',
'defclass',
field('decorators', optional($.variable_list)),
optional($.type_parameters),
field('name', $.symbol),
field('superclasses', $.variable_list),
repeat($._element),
')',
),
macro: $ => seq(
'(',
'defmacro',
field('name', $.symbol),
$.parameter_list,
repeat($._element),
')',
),
reader: $ => seq(
'(',
'defreader',
field('name', $.symbol),
repeat($._element),
')',
),
module_import: $ => seq(
choice(
seq($._variable, optional('*')),
$.aliased_import,
),
),
named_import: $ => seq(
$._variable,
seq(
'[',
repeat1(
choice(
$.symbol,
$.aliased_import,
),
),
']',
),
),
namespace_require: $ => seq(
$._variable,
choice(
repeat1(
seq(
':',
choice(
token.immediate('macros'),
token.immediate('readers'),
),
'[',
repeat1(
choice(
$.symbol,
$.aliased_import,
),
),
']',
),
),
seq(
$.keyword,
'*',
),
),
),
variable_list: $ => seq(
'[',
repeat1($._variable),
']',
),
type_parameters: $ => seq(
':',
token.immediate('tp'),
'[',
repeat1($._variable),
']',
),
type_annotation: $ => seq(
'#^',
field('type', $._variable),
),
parameter_list: $ => seq(
'[',
repeat(
choice(
$.symbol,
seq(
'[',
$.symbol,
$._form,
']',
),
'/',
'*',
'#*',
'#**',
),
),
']',
),
_variable: $ => choice(
$.symbol,
$.dotted_identifier,
),
aliased_import: $ => seq(
$._variable,
':',
token.immediate('as'),
$.symbol,
),
}
});