Run a build and commit the results

2025-11-13 22:13:50 -05:00 · 2023-10-10 23:21:12 +11:00
parent 158ffef943
commit a126d20c24
815 changed files with 3403 additions and 89318 deletions
--- a/autoload/smt2/formatter.vim
+++ b/autoload/smt2/formatter.vim
@@ -3,14 +3,17 @@ if polyglot#init#is_disabled(expand('<sfile>:p'), 'smt2', 'autoload/smt2/formatt
 endif

 " Formatting requires a rather recent Vim version
-if !((v:version > 802) || (v:version == 802 && has("patch2725")))
+if (v:version < 802) || (v:version == 802 && !has("patch2725"))
    const s:errmsg_oldvim = "Vim >= 8.2.2725 required for auto-formatting"

    "Dummies
    function! smt2#formatter#FormatCurrentParagraph()
        echoerr s:errmsg_oldvim
    endfunction
-    function! smt2#formatter#FormatAllParagraphs()
+    function! smt2#formatter#FormatOutermostSExpr()
+        echoerr s:errmsg_oldvim
+    endfunction
+    function! smt2#formatter#FormatFile()
        echoerr s:errmsg_oldvim
    endfunction

@@ -88,55 +91,75 @@ def Format(ast: dict<any>, indent = 0): string
            call formatted->add(child->Format())
        endfor
        return formatted->join("\n")
+    elseif ast.kind ==# 'File'
+        var formatted = []
+        for child in ast.value
+            call formatted->add(child->Format())
+        endfor
+        return formatted->join("\n\n")
    endif
    throw 'Cannot format AST node: ' .. string(ast)
    return '' # Unreachable
 enddef

+# ------------------------------------------------------------------------------
+# Auxiliary
+# ------------------------------------------------------------------------------
+
+def FormatInCurrentBuffer(ast: dict<any>)
+    const cursor = getpos('.')
+
+    # Format lines and potential surrounding text on them
+    const formatted_lines = split(Format(ast), '\n')
+    const ast_coords = ast.CalcCoords()
+    const ws_mask = " \n\r\t"
+    const first_line_part_to_keep = getline(ast_coords[0].line)
+        ->strcharpart(0, ast_coords[0].col - 2)
+        ->trim(ws_mask, 2)
+    const last_line_part_to_keep = getline(ast_coords[1].line)
+        ->strcharpart(ast_coords[1].col - 1)
+        ->trim(ws_mask, 1)
+
+    # If section of AST has trailing whitespace until the file end, remove it
+    cursor(ast_coords[1].line, ast_coords[1].col)
+    if search('\m\C\S', 'W') == 0
+        deletebufline('%', ast_coords[1].line + 1, line('$'))
+    endif
+
+    # Replace section of AST by formatted lines (w/o killing surrounding text)
+    deletebufline('%', ast_coords[0].line, ast_coords[1].line)
+    if !empty(last_line_part_to_keep)
+        last_line_part_to_keep->append(ast_coords[0].line - 1)
+    endif
+    formatted_lines->append(ast_coords[0].line - 1)
+    if !empty(first_line_part_to_keep)
+        first_line_part_to_keep->append(ast_coords[0].line - 1)
+    endif
+
+    # If section of AST has leading whitespace until the file start, remove it
+    cursor(ast_coords[0].line, ast_coords[0].col)
+    if search('\m\C\S', 'bW') == 0
+        deletebufline('%', 1, ast_coords[0].line - 1)
+    endif
+
+    # Restore cursor position
+    call setpos('.', cursor)
+enddef
+
 # ------------------------------------------------------------------------------
 # Public functions
 # ------------------------------------------------------------------------------
 def smt2#formatter#FormatCurrentParagraph()
-    const cursor = getpos('.')
    const ast = smt2#parser#ParseCurrentParagraph()
-
-    # Identify on which end of the buffer we are (to fix newlines later)
-    silent! normal! {
-    const is_first_paragraph = line('.') == 1
-    silent! normal! }
-    const is_last_paragraph = line('.') == line('$')
-
-    # Replace paragraph by formatted lines
-    const lines = split(Format(ast), '\n')
-    silent! normal! {d}
-    if is_last_paragraph && !is_first_paragraph
-        call append('.', [''] + lines)
-    else
-        call append('.', lines + [''])
-    endif
-
-    # Remove potentially introduced first empty line
-    if is_first_paragraph | silent! :1delete | endif
-
-    # Restore cursor position
-    call setpos('.', cursor)
+    FormatInCurrentBuffer(ast)
 enddef

-def smt2#formatter#FormatAllParagraphs()
-    const cursor = getpos('.')
-    const asts = smt2#parser#ParseAllParagraphs()
-
-    # Clear buffer & insert formatted paragraphs
-    silent! :1,$delete
-    for ast in asts
-        const lines = split(Format(ast), '\n') + ['']
-        call append('$', lines)
-    endfor
-
-    # Remove first & trailing empty lines
-    silent! :1delete
-    silent! :$delete
-
-    # Restore cursor position
-    call setpos('.', cursor)
+def smt2#formatter#FormatOutermostSExpr()
+    const ast = smt2#parser#ParseOutermostSExpr()
+    FormatInCurrentBuffer(ast)
+enddef
+
+def smt2#formatter#FormatFile()
+    const ast = smt2#parser#ParseFile()
+    FormatInCurrentBuffer(ast)
 enddef
--- a/autoload/smt2/parser.vim
+++ b/autoload/smt2/parser.vim
@@ -11,17 +11,37 @@ set maxfuncdepth=100000000 # SMT files tend to be highly nested
 # TODO: Change Ast.kind type from string to enum/number?

 # ------------------------------------------------------------------------------
-# AST nodes -- essentially named token wrappers
+# AST nodes
 #
-# Note: pos_from, pos_to and contains_comment were only introduced to allow for
-#       a fast FitsOneLine(ast) function in the formatter.
-#       Here, pos_from and pos_to refer to indices of characters -- not tokens
+# Note: pos_from, pos_to and contains_comment allow for a fast FitsOneLine(ast)
+#       function in the formatter.
+#       Here, pos_from and pos_to refer to indices of characters -- not tokens.
 # ------------------------------------------------------------------------------
-def Ast(kind: string, value: any, pos_from: number, pos_to: number, contains_comment: bool): dict<any>
-    return {kind: kind, value: value, pos_from: pos_from, pos_to: pos_to, contains_comment: contains_comment}
+def Ast(kind: string, value: any, pos_from: number, pos_to: number, contains_comment: bool, scanner: dict<any>): dict<any>
+    # User-facing functionality wants start/end line and column -- not positions
+    def CalcCoords(): list<dict<number>>
+        const from = scanner.calcCoord(pos_from)
+        # If expression ends at end of line, pos_to will be in next line.
+        # That's undesired. Stay in the actual last line.
+        var to = scanner.calcCoord(pos_to - 1)
+        to.col += 1
+        return [from, to]
+    enddef
+    return {kind: kind, value: value, pos_from: pos_from, pos_to: pos_to, contains_comment: contains_comment, CalcCoords: CalcCoords}
 enddef

-def ParagraphAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any>
+def FileAst(paragraphs: list<dict<any>>, pos_from: number, pos_to: number, scanner: dict<any>): dict<any>
+    var contains_comment = false
+    for paragraph in paragraphs
+        if paragraph.contains_comment
+            contains_comment = true
+            break
+        endif
+    endfor
+    return Ast('File', paragraphs, pos_from, pos_to, contains_comment, scanner)
+enddef
+
+def ParagraphAst(exprs: list<dict<any>>, pos_from: number, pos_to: number, scanner: dict<any>): dict<any>
    var contains_comment = false
    for expr in exprs
        if expr.contains_comment
@@ -29,10 +49,10 @@ def ParagraphAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict
            break
        endif
    endfor
-    return Ast('Paragraph', exprs, pos_from, pos_to, contains_comment)
+    return Ast('Paragraph', exprs, pos_from, pos_to, contains_comment, scanner)
 enddef

-def SExprAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any>
+def SExprAst(exprs: list<dict<any>>, pos_from: number, pos_to: number, scanner: dict<any>): dict<any>
    var contains_comment = false
    for expr in exprs
        if expr.contains_comment
@@ -40,15 +60,22 @@ def SExprAst(exprs: list<dict<any>>, pos_from: number, pos_to: number): dict<any
            break
        endif
    endfor
-    return Ast('SExpr', exprs, pos_from, pos_to, contains_comment)
+    return Ast('SExpr', exprs, pos_from, pos_to, contains_comment, scanner)
 enddef

-def AtomAst(token: dict<any>): dict<any>
-    return Ast('Atom', token, token.pos, token.pos + len(token.lexeme), token.kind == 8)
+def AtomAst(token: dict<any>, scanner: dict<any>): dict<any>
+    return Ast('Atom', token, token.pos, token.pos + len(token.lexeme), token.kind == 8, scanner)
 enddef

 def PrintAst(ast: dict<any>, indent = 0)
-    echo repeat(' ', indent * 2) .. '[' .. ast.kind .. '] '
+    const coords = ast.CalcCoords()
+
+    echo printf("[%5d-%-5d) [%4d:%-3d-%4d:%-3d) %s[%s] ",
+        ast.pos_from, ast.pos_to,
+        coords[0].line, coords[0].col,
+        coords[1].line, coords[1].col,
+        repeat(' ', indent * 2),
+        ast.kind)

    if ast.kind ==# 'Atom'
        echon ast.value.lexeme
@@ -60,13 +87,18 @@ def PrintAst(ast: dict<any>, indent = 0)
        for v in ast.value
            call PrintAst(v, indent + 1)
        endfor
+    elseif ast.kind ==# 'File'
+        for v in ast.value
+            call PrintAst(v, indent + 1)
+        endfor
    endif
 enddef

 # ------------------------------------------------------------------------------
 # Grammar
 # ------------------------------------------------------------------------------
-# Paragraph ::= Expr+
+# File      ::= Paragraph+
+# Paragraph ::= Expr+ EndOfParagraph
 # Expr      ::= SExpr | Atom
 # SExpr     ::= '(' Expr* ')'

@@ -77,14 +109,16 @@ def AtStartOfLParen(scanner: dict<any>): bool
    return scanner.cur_token.kind == 0 # token_lparen
 enddef

-def ParseLParen(scanner: dict<any>) # consumes token; no return
+def ParseLParen(scanner: dict<any>): dict<any>
    if debug
        scanner->smt2#scanner#Enforce(scanner->AtStartOfLParen(),
            "ParseLParen called but not at start of LParen",
            scanner.cur_token.pos)
    endif

+    const token = scanner.cur_token
    scanner->smt2#scanner#NextToken()
+    return token
 enddef

 # ------------------------------------------------------------------------------
@@ -94,14 +128,16 @@ def AtStartOfRParen(scanner: dict<any>): bool
    return scanner.cur_token.kind == 1 # token_rparen
 enddef

-def ParseRParen(scanner: dict<any>) # consumes token; no return
+def ParseRParen(scanner: dict<any>): dict<any>
    if debug
        scanner->smt2#scanner#Enforce(scanner->AtStartOfRParen(),
            "ParseRParen called but not at start of RParen",
            scanner.cur_token.pos)
    endif

+    const token = scanner.cur_token
    scanner->smt2#scanner#NextToken()
+    return token
 enddef

 # ------------------------------------------------------------------------------
@@ -118,7 +154,7 @@ def ParseAtom(scanner: dict<any>): dict<any>
            scanner.cur_token.pos)
    endif

-    const ast = AtomAst(scanner.cur_token)
+    const ast = AtomAst(scanner.cur_token, scanner)
    scanner->smt2#scanner#NextToken()
    return ast
 enddef
@@ -129,6 +165,7 @@ enddef
 def AtStartOfExpr(scanner: dict<any>): bool
    return scanner->AtStartOfSExpr() || scanner->AtStartOfAtom()
 enddef
+
 def ParseExpr(scanner: dict<any>): dict<any>
    if debug
        scanner->smt2#scanner#Enforce(scanner->AtStartOfExpr(),
@@ -146,6 +183,7 @@ enddef
 # SExpr
 # ------------------------------------------------------------------------------
 const AtStartOfSExpr = funcref(AtStartOfLParen)
+
 def ParseSExpr(scanner: dict<any>): dict<any>
    const pos_from = scanner.cur_token.pos

@@ -165,10 +203,10 @@ def ParseSExpr(scanner: dict<any>): dict<any>
    scanner->smt2#scanner#Enforce(scanner->AtStartOfRParen(),
        printf("Expected RParen but got %s", scanner.cur_token.kind->smt2#scanner#TokenKind2Str()),
        scanner.cur_token.pos)
-    scanner->ParseRParen()
+    const end_token = scanner->ParseRParen()

-    const pos_to = scanner.cur_token.pos
-    return SExprAst(exprs, pos_from, pos_to)
+    const pos_to = end_token.pos + 1
+    return SExprAst(exprs, pos_from, pos_to, scanner)
 enddef

 # ------------------------------------------------------------------------------
@@ -187,41 +225,132 @@ def ParseParagraph(scanner: dict<any>): dict<any>
        exprs->add(scanner->ParseExpr())
    endwhile

-    const pos_to = scanner.cur_token.pos
-    return ParagraphAst(exprs, pos_from, pos_to)
+    const pos_to = exprs[-1].pos_to
+    return ParagraphAst(exprs, pos_from, pos_to, scanner)
+enddef
+
+# ------------------------------------------------------------------------------
+# File
+# ------------------------------------------------------------------------------
+def ParseFile(scanner: dict<any>): dict<any>
+    const pos_from = scanner.cur_token.pos
+
+    var paragraphs = []
+    while scanner.cur_token.kind != 9 # token_eof
+        const ast = scanner->ParseParagraph()
+        paragraphs->add(ast)
+    endwhile
+
+    const pos_to = empty(paragraphs) ? pos_from : paragraphs[-1].pos_to
+    return FileAst(paragraphs, pos_from, pos_to, scanner)
+enddef
+
+# ------------------------------------------------------------------------------
+# Auxiliary
+#
+# TODO: MoveTo* functions rely on local search instead of proper (but slow)
+#       scanning of the whole file and may be incorrect in corner cases.
+#       Consider tweaking.
+# ------------------------------------------------------------------------------
+
+# Returns true if successful, i.e. on move to '(' of outermost SExpr
+def MoveToOutermostSExpr(): bool
+    var cur_pos = getpos('.')
+    while true
+        silent! normal! [(
+        const new_pos = getpos('.')
+        if cur_pos == new_pos
+            break
+        else
+            cur_pos = new_pos
+        endif
+    endwhile
+
+    const cur_char = getline('.')[charcol('.') - 1]
+    return cur_char == '('
+enddef
+
+def CursorInSExpr(): bool
+    const cursor = getpos('.')
+    silent! normal! [(
+    const res = cursor != getpos('.')
+    call setpos('.', cursor)
+    return res
+enddef
+
+def MoveToStartOfCurrentParagraph()
+    # Empty (or whitespace) lines outside of S-expressions separate paragraphs.
+    # Nothing to do if cursor is already at such a line.
+    if !(getline('.')->trim()->empty() && !CursorInSExpr())
+        # Move backwards until an empty line that is not in an SExpr is found,
+        # or -- if there is none -- to the first line of the file
+        while true
+            const empty_line = search('\m\C^\s*$', 'b', 1)
+            if !CursorInSExpr()
+                break
+            elseif empty_line == 0
+                cursor(1, 1)
+                break
+            endif
+        endwhile
+    endif
 enddef

 # ------------------------------------------------------------------------------
 # Public functions
 # ------------------------------------------------------------------------------
 def smt2#parser#ParseCurrentParagraph(): dict<any>
-    # source = [start of current paragraph, EOF]
-    # Note: This is needed since `silent! normal! {y}` may not yank full paragraphs
-    #       in the context of multiline expressions
    const cursor = getpos('.')
-    silent! normal! {
-    const line_offset = line('.')
-    const source = join(getline('.', '$'), "\n")
+    MoveToStartOfCurrentParagraph()
+    const from = getpos('.')
    call setpos('.', cursor)

-    var scanner = smt2#scanner#Scanner(source, line_offset)
+    # source = [paragraph of outermost SExpr, EOF]
+    # Note: This is needed since `silent! normal! {` is not guaranteed to jump
+    #       to the start of the paragraph, e.g. if newlines occur in between.
+    const lines_to_format = getline(from[1], '$')
+    const source = join(lines_to_format, "\n")
+
+    var scanner = smt2#scanner#Scanner(source, from[1], from[2])
    const ast = scanner->ParseParagraph()

    if debug | ast->PrintAst() | endif
    return ast
 enddef

-def smt2#parser#ParseAllParagraphs(): list<dict<any>>
-    # source = current buffer
-    const source = join(getline(1, '$'), "\n")
+def smt2#parser#ParseOutermostSExpr(): dict<any>
+    const cursor = getpos('.')
+    if ! MoveToOutermostSExpr()
+        throw "Cursor is not in an S-expression!"
+    endif
+    const from = getpos('.')
+    call setpos('.', cursor)

-    var scanner = smt2#scanner#Scanner(source)
-    var asts = []
-    while scanner.cur_token.kind != 9 # token_eof
-        const ast = scanner->ParseParagraph()
-        asts->add(ast)
+    # source = [start of outermost SExpr, EOF]
+    # Note: This is needed since `silent! normal! %` is not guaranteed to jump
+    #       to the matching ')', e.g. if an unmatched '(' occurs in a comment.
+    const lines_to_format = getline(from[1], '$')
+    const source = join(lines_to_format, "\n")

-        if debug | ast->PrintAst() | endif
-    endwhile
-    return asts
+    var scanner = smt2#scanner#Scanner(source, from[1], from[2])
+    const ast = scanner->ParseSExpr()
+
+    if debug | ast->PrintAst() | endif
+    return ast
+enddef
+
+def smt2#parser#ParseFile(): dict<any>
+    const cursor = getpos('.')
+    cursor(1, 1)
+    const first_non_empty_line = search('.')
+    call setpos('.', cursor)
+
+    # source = [first non-empty line, EOF]
+    const source = join(getline(first_non_empty_line, '$'), "\n")
+
+    var scanner = smt2#scanner#Scanner(source, first_non_empty_line)
+    const ast = scanner->ParseFile()
+
+    if debug | ast->PrintAst() | endif
+    return ast
 enddef
--- a/autoload/smt2/scanner.vim
+++ b/autoload/smt2/scanner.vim
@@ -56,7 +56,7 @@ enddef

 def PrettyPrint(scanner: dict<any>, token: dict<any>)
    const coord = scanner->Pos2Coord(token.pos)
-    echo printf("%4d:%-3d (%5d) %8s %s", coord.line, coord.col, token.pos, token.kind->smt2#scanner#TokenKind2Str(), token.lexeme)
+    echo printf("%5d %4d:%-3d  %8s %s", token.pos, coord.line, coord.col, token.kind->smt2#scanner#TokenKind2Str(), token.lexeme)
 enddef

 # ------------------------------------------------------------------------------
@@ -72,12 +72,14 @@ enddef
 # TODO: Enforce restriction to ASCII? We should if we use the lookup table below
 # TODO: Do not take a string but a character stream (or just buffer and pos)?

-def smt2#scanner#Scanner(source: string, line_offset = 1): dict<any>
+def smt2#scanner#Scanner(source: string, start_line = 1, start_col = 1): dict<any>
    var scanner = {
-        chars: source->trim(" \t\n\r", 2)->split('\zs'),
-        line_offset: line_offset, # start line of source string in buffer
-        pos: 0,                   # pos in source string -- not column in line
-        at_new_paragraph: false}
+        chars: source->trim(" \n\r\t", 2)->split('\zs'),
+        line_offset: start_line, # start line of source string in buffer
+        pos: start_col - 1,      # pos in source string -- not column in line
+        at_new_paragraph: false,
+    }
+    scanner.calcCoord = (pos: number): dict<number> => Pos2Coord(scanner, pos)

    if scanner.chars->empty()
        scanner.at_eof = true