mirror of
https://github.com/sheerun/vim-polyglot.git
synced 2025-11-14 06:23:50 -05:00
Update
This commit is contained in:
227
autoload/smt2/parser.vim
Normal file
227
autoload/smt2/parser.vim
Normal file
@@ -0,0 +1,227 @@
|
||||
if polyglot#init#is_disabled(expand('<sfile>:p'), 'smt2', 'autoload/smt2/parser.vim')
|
||||
finish
|
||||
endif
|
||||
|
||||
vim9script
|
||||
const debug = false
|
||||
set maxfuncdepth=100000000 # SMT files tend to be highly nested
|
||||
|
||||
# TODO: Retry iterative parsing now that we have a scanner and simpler grammar
|
||||
# TODO: Refer to token kind by name, e.g. token_comment instead of 8
|
||||
# TODO: Change Ast.kind type from string to enum/number?
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# AST nodes -- essentially named token wrappers
|
||||
#
|
||||
# Note: pos_from, pos_to and contains_comment were only introduced to allow for
|
||||
# a fast FitsOneLine(ast) function in the formatter.
|
||||
# Here, pos_from and pos_to refer to indices of characters -- not tokens
|
||||
# ------------------------------------------------------------------------------
|
||||
def Ast(kind: string, value: any, pos_from: number, pos_to: number, contains_comment: bool): dict<any>
|
||||
return {kind: kind, value: value, pos_from: pos_from, pos_to: pos_to, contains_comment: contains_comment}
|
||||
enddef
|
||||
|
||||
def ParagraphAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any>
|
||||
var contains_comment = false
|
||||
for expr in exprs
|
||||
if expr.contains_comment
|
||||
contains_comment = true
|
||||
break
|
||||
endif
|
||||
endfor
|
||||
return Ast('Paragraph', exprs, pos_from, pos_to, contains_comment)
|
||||
enddef
|
||||
|
||||
def SExprAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any>
|
||||
var contains_comment = false
|
||||
for expr in exprs
|
||||
if expr.contains_comment
|
||||
contains_comment = true
|
||||
break
|
||||
endif
|
||||
endfor
|
||||
return Ast('SExpr', exprs, pos_from, pos_to, contains_comment)
|
||||
enddef
|
||||
|
||||
def AtomAst(token: dict<any>): dict<any>
|
||||
return Ast('Atom', token, token.pos, token.pos + len(token.lexeme), token.kind == 8)
|
||||
enddef
|
||||
|
||||
def PrintAst(ast: dict<any>, indent = 0)
|
||||
echo repeat(' ', indent * 2) .. '[' .. ast.kind .. '] '
|
||||
|
||||
if ast.kind ==# 'Atom'
|
||||
echon ast.value.lexeme
|
||||
elseif ast.kind ==# 'SExpr'
|
||||
for v in ast.value
|
||||
call PrintAst(v, indent + 1)
|
||||
endfor
|
||||
elseif ast.kind ==# 'Paragraph'
|
||||
for v in ast.value
|
||||
call PrintAst(v, indent + 1)
|
||||
endfor
|
||||
endif
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Grammar
|
||||
# ------------------------------------------------------------------------------
|
||||
# Paragraph ::= Expr+
|
||||
# Expr ::= SExpr | Atom
|
||||
# SExpr ::= '(' Expr* ')'
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# LParen
|
||||
# ------------------------------------------------------------------------------
|
||||
def AtStartOfLParen(scanner: dict<any>): bool
|
||||
return scanner.cur_token.kind == 0 # token_lparen
|
||||
enddef
|
||||
|
||||
def ParseLParen(scanner: dict<any>) # consumes token; no return
|
||||
if debug
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfLParen(),
|
||||
"ParseLParen called but not at start of LParen",
|
||||
scanner.cur_token.pos)
|
||||
endif
|
||||
|
||||
scanner->smt2#scanner#NextToken()
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# RParen
|
||||
# ------------------------------------------------------------------------------
|
||||
def AtStartOfRParen(scanner: dict<any>): bool
|
||||
return scanner.cur_token.kind == 1 # token_rparen
|
||||
enddef
|
||||
|
||||
def ParseRParen(scanner: dict<any>) # consumes token; no return
|
||||
if debug
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfRParen(),
|
||||
"ParseRParen called but not at start of RParen",
|
||||
scanner.cur_token.pos)
|
||||
endif
|
||||
|
||||
scanner->smt2#scanner#NextToken()
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Atom
|
||||
# ------------------------------------------------------------------------------
|
||||
def AtStartOfAtom(scanner: dict<any>): bool
|
||||
return 2 <= scanner.cur_token.kind && scanner.cur_token.kind <= 8
|
||||
enddef
|
||||
|
||||
def ParseAtom(scanner: dict<any>): dict<any>
|
||||
if debug
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfAtom(),
|
||||
"ParseAtom called but not at start of Atom",
|
||||
scanner.cur_token.pos)
|
||||
endif
|
||||
|
||||
const ast = AtomAst(scanner.cur_token)
|
||||
scanner->smt2#scanner#NextToken()
|
||||
return ast
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Expr
|
||||
# ------------------------------------------------------------------------------
|
||||
def AtStartOfExpr(scanner: dict<any>): bool
|
||||
return scanner->AtStartOfSExpr() || scanner->AtStartOfAtom()
|
||||
enddef
|
||||
def ParseExpr(scanner: dict<any>): dict<any>
|
||||
if debug
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfExpr(),
|
||||
"ParseExpr called but not at start of Expr",
|
||||
scanner.cur_token.pos)
|
||||
endif
|
||||
|
||||
if scanner->AtStartOfSExpr()
|
||||
return scanner->ParseSExpr()
|
||||
endif
|
||||
return scanner->ParseAtom()
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# SExpr
|
||||
# ------------------------------------------------------------------------------
|
||||
const AtStartOfSExpr = funcref(AtStartOfLParen)
|
||||
def ParseSExpr(scanner: dict<any>): dict<any>
|
||||
const pos_from = scanner.cur_token.pos
|
||||
|
||||
if debug
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfSExpr(),
|
||||
"ParseSExpr called but not at start of SExpr",
|
||||
pos_from)
|
||||
endif
|
||||
scanner->ParseLParen()
|
||||
|
||||
# Expr*
|
||||
var exprs: list<any>
|
||||
while scanner->AtStartOfExpr()
|
||||
exprs->add(scanner->ParseExpr())
|
||||
endwhile
|
||||
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfRParen(),
|
||||
printf("Expected RParen but got %s", scanner.cur_token.kind->smt2#scanner#TokenKind2Str()),
|
||||
scanner.cur_token.pos)
|
||||
scanner->ParseRParen()
|
||||
|
||||
const pos_to = scanner.cur_token.pos
|
||||
return SExprAst(exprs, pos_from, pos_to)
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Paragraph
|
||||
# ------------------------------------------------------------------------------
|
||||
def ParseParagraph(scanner: dict<any>): dict<any>
|
||||
const pos_from = scanner.cur_token.pos
|
||||
|
||||
# Expr+
|
||||
scanner->smt2#scanner#Enforce(scanner->AtStartOfExpr(),
|
||||
printf("Expected Expr but got %s", scanner.cur_token.kind->smt2#scanner#TokenKind2Str()),
|
||||
pos_from)
|
||||
|
||||
var exprs = [scanner->ParseExpr()]
|
||||
while scanner->AtStartOfExpr() && !scanner.at_new_paragraph
|
||||
exprs->add(scanner->ParseExpr())
|
||||
endwhile
|
||||
|
||||
const pos_to = scanner.cur_token.pos
|
||||
return ParagraphAst(exprs, pos_from, pos_to)
|
||||
enddef
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Public functions
|
||||
# ------------------------------------------------------------------------------
|
||||
def smt2#parser#ParseCurrentParagraph(): dict<any>
|
||||
# source = [start of current paragraph, EOF]
|
||||
# Note: This is needed since `silent! normal! {y}` may not yank full paragraphs
|
||||
# in the context of multiline expressions
|
||||
const cursor = getpos('.')
|
||||
silent! normal! {
|
||||
const line_offset = line('.')
|
||||
const source = join(getline('.', '$'), "\n")
|
||||
call setpos('.', cursor)
|
||||
|
||||
var scanner = smt2#scanner#Scanner(source, line_offset)
|
||||
const ast = scanner->ParseParagraph()
|
||||
|
||||
if debug | ast->PrintAst() | endif
|
||||
return ast
|
||||
enddef
|
||||
|
||||
def smt2#parser#ParseAllParagraphs(): list<dict<any>>
|
||||
# source = current buffer
|
||||
const source = join(getline(1, '$'), "\n")
|
||||
|
||||
var scanner = smt2#scanner#Scanner(source)
|
||||
var asts = []
|
||||
while scanner.cur_token.kind != 9 # token_eof
|
||||
const ast = scanner->ParseParagraph()
|
||||
asts->add(ast)
|
||||
|
||||
if debug | ast->PrintAst() | endif
|
||||
endwhile
|
||||
return asts
|
||||
enddef
|
||||
Reference in New Issue
Block a user