mirror of
https://github.com/junegunn/fzf.git
synced 2025-11-16 07:13:48 -05:00
Speed up ANSI code processing (#2368)
This commit speeds up the parsing/processing of ANSI escape codes by roughly 7.5x. The speedup is mostly accomplished by replacing the regex with dedicated parsing logic (nextAnsiEscapeSequence()) and reducing the number of allocations in extractColor(). #### Benchmarks ``` name old time/op new time/op delta ExtractColor-16 4.89µs ± 5% 0.64µs ± 2% -86.87% (p=0.000 n=9+9) name old speed new speed delta ExtractColor-16 25.6MB/s ± 5% 194.6MB/s ± 2% +661.43% (p=0.000 n=9+9) name old alloc/op new alloc/op delta ExtractColor-16 1.37kB ± 0% 0.31kB ± 0% -77.31% (p=0.000 n=10+10) name old allocs/op new allocs/op delta ExtractColor-16 48.0 ± 0% 4.0 ± 0% -91.67% (p=0.000 n=10+10) ```
This commit is contained in:
263
src/ansi.go
263
src/ansi.go
@@ -1,8 +1,6 @@
|
||||
package fzf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
@@ -82,73 +80,154 @@ func toAnsiString(color tui.Color, offset int) string {
|
||||
return ret + ";"
|
||||
}
|
||||
|
||||
var ansiRegex *regexp.Regexp
|
||||
|
||||
func init() {
|
||||
/*
|
||||
References:
|
||||
- https://github.com/gnachman/iTerm2
|
||||
- http://ascii-table.com/ansi-escape-sequences.php
|
||||
- http://ascii-table.com/ansi-escape-sequences-vt-100.php
|
||||
- http://tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html
|
||||
- https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
|
||||
*/
|
||||
// The following regular expression will include not all but most of the
|
||||
// frequently used ANSI sequences
|
||||
ansiRegex = regexp.MustCompile("(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)")
|
||||
func isPrint(c uint8) bool {
|
||||
return '\x20' <= c && c <= '\x7e'
|
||||
}
|
||||
|
||||
func findAnsiStart(str string) int {
|
||||
idx := 0
|
||||
for ; idx < len(str); idx++ {
|
||||
b := str[idx]
|
||||
if b == 0x1b || b == 0x0e || b == 0x0f {
|
||||
return idx
|
||||
func matchOperatingSystemCommand(s string) int {
|
||||
// `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)`
|
||||
// ^ match starting here
|
||||
//
|
||||
i := 5 // prefix matched in nextAnsiEscapeSequence()
|
||||
for ; i < len(s) && isPrint(s[i]); i++ {
|
||||
}
|
||||
if i < len(s) {
|
||||
if s[i] == '\x07' {
|
||||
return i + 1
|
||||
}
|
||||
if b == 0x08 && idx > 0 {
|
||||
return idx - 1
|
||||
if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' {
|
||||
return i + 2
|
||||
}
|
||||
}
|
||||
return idx
|
||||
return -1
|
||||
}
|
||||
|
||||
func matchControlSequence(s string) int {
|
||||
// `\x1b[\\[()][0-9;]*[a-zA-Z@]`
|
||||
// ^ match starting here
|
||||
//
|
||||
i := 2 // prefix matched in nextAnsiEscapeSequence()
|
||||
for ; i < len(s) && (isNumeric(s[i]) || s[i] == ';'); i++ {
|
||||
}
|
||||
if i < len(s) {
|
||||
c := s[i]
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '@' {
|
||||
return i + 1
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func isCtrlSeqStart(c uint8) bool {
|
||||
return c == '\\' || c == '[' || c == '(' || c == ')'
|
||||
}
|
||||
|
||||
// nextAnsiEscapeSequence returns the ANSI escape sequence and is equivalent to
|
||||
// calling FindStringIndex() on the below regex (which was originally used):
|
||||
//
|
||||
// "(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)"
|
||||
//
|
||||
func nextAnsiEscapeSequence(s string) (int, int) {
|
||||
// fast check for ANSI escape sequences
|
||||
i := 0
|
||||
for ; i < len(s); i++ {
|
||||
switch s[i] {
|
||||
case '\x0e', '\x0f', '\x1b', '\x08':
|
||||
// We ignore the fact that '\x08' cannot be the first char
|
||||
// in the string and be an escape sequence for the sake of
|
||||
// speed and simplicity.
|
||||
goto Loop
|
||||
}
|
||||
}
|
||||
return -1, -1
|
||||
|
||||
Loop:
|
||||
for ; i < len(s); i++ {
|
||||
switch s[i] {
|
||||
case '\x08':
|
||||
// backtrack to match: `.\x08`
|
||||
if i > 0 && s[i-1] != '\n' {
|
||||
if s[i-1] < utf8.RuneSelf {
|
||||
return i - 1, i + 1
|
||||
}
|
||||
_, n := utf8.DecodeLastRuneInString(s[:i])
|
||||
return i - n, i + 1
|
||||
}
|
||||
case '\x1b':
|
||||
// match: `\x1b[\\[()][0-9;]*[a-zA-Z@]`
|
||||
if i+2 < len(s) && isCtrlSeqStart(s[i+1]) {
|
||||
if j := matchControlSequence(s[i:]); j != -1 {
|
||||
return i, i + j
|
||||
}
|
||||
}
|
||||
|
||||
// match: `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)`
|
||||
if i+5 < len(s) && s[i+1] == ']' && isNumeric(s[i+2]) &&
|
||||
s[i+3] == ';' && isPrint(s[i+4]) {
|
||||
|
||||
if j := matchOperatingSystemCommand(s[i:]); j != -1 {
|
||||
return i, i + j
|
||||
}
|
||||
}
|
||||
|
||||
// match: `\x1b.`
|
||||
if i+1 < len(s) && s[i+1] != '\n' {
|
||||
if s[i+1] < utf8.RuneSelf {
|
||||
return i, i + 2
|
||||
}
|
||||
_, n := utf8.DecodeRuneInString(s[i+1:])
|
||||
return i, i + n + 1
|
||||
}
|
||||
case '\x0e', '\x0f':
|
||||
// match: `[\x0e\x0f]`
|
||||
return i, i + 1
|
||||
}
|
||||
}
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
func extractColor(str string, state *ansiState, proc func(string, *ansiState) bool) (string, *[]ansiOffset, *ansiState) {
|
||||
var offsets []ansiOffset
|
||||
var output bytes.Buffer
|
||||
// We append to a stack allocated variable that we'll
|
||||
// later copy and return, to save on allocations.
|
||||
offsets := make([]ansiOffset, 0, 32)
|
||||
|
||||
if state != nil {
|
||||
offsets = append(offsets, ansiOffset{[2]int32{0, 0}, *state})
|
||||
}
|
||||
|
||||
prevIdx := 0
|
||||
runeCount := 0
|
||||
var (
|
||||
pstate *ansiState // lazily allocated
|
||||
output strings.Builder
|
||||
prevIdx int
|
||||
runeCount int
|
||||
)
|
||||
for idx := 0; idx < len(str); {
|
||||
idx += findAnsiStart(str[idx:])
|
||||
if idx == len(str) {
|
||||
// Make sure that we found an ANSI code
|
||||
start, end := nextAnsiEscapeSequence(str[idx:])
|
||||
if start == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
// Make sure that we found an ANSI code
|
||||
offset := ansiRegex.FindStringIndex(str[idx:])
|
||||
if len(offset) < 2 {
|
||||
idx++
|
||||
continue
|
||||
}
|
||||
offset[0] += idx
|
||||
offset[1] += idx
|
||||
idx = offset[1]
|
||||
start += idx
|
||||
idx += end
|
||||
|
||||
// Check if we should continue
|
||||
prev := str[prevIdx:offset[0]]
|
||||
prev := str[prevIdx:start]
|
||||
if proc != nil && !proc(prev, state) {
|
||||
return "", nil, nil
|
||||
}
|
||||
prevIdx = idx
|
||||
|
||||
prevIdx = offset[1]
|
||||
runeCount += utf8.RuneCountInString(prev)
|
||||
output.WriteString(prev)
|
||||
if len(prev) != 0 {
|
||||
runeCount += utf8.RuneCountInString(prev)
|
||||
// Grow the buffer size to the maximum possible length (string length
|
||||
// containing ansi codes) to avoid repetitive allocation
|
||||
if output.Cap() == 0 {
|
||||
output.Grow(len(str))
|
||||
}
|
||||
output.WriteString(prev)
|
||||
}
|
||||
|
||||
newState := interpretCode(str[offset[0]:offset[1]], state)
|
||||
newState := interpretCode(str[start:idx], state)
|
||||
if !newState.equals(state) {
|
||||
if state != nil {
|
||||
// Update last offset
|
||||
@@ -157,8 +236,15 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
|
||||
|
||||
if newState.colored() {
|
||||
// Append new offset
|
||||
state = newState
|
||||
offsets = append(offsets, ansiOffset{[2]int32{int32(runeCount), int32(runeCount)}, *state})
|
||||
if pstate == nil {
|
||||
pstate = &ansiState{}
|
||||
}
|
||||
*pstate = newState
|
||||
state = pstate
|
||||
offsets = append(offsets, ansiOffset{
|
||||
[2]int32{int32(runeCount), int32(runeCount)},
|
||||
newState,
|
||||
})
|
||||
} else {
|
||||
// Discard state
|
||||
state = nil
|
||||
@@ -168,7 +254,6 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
|
||||
|
||||
var rest string
|
||||
var trimmed string
|
||||
|
||||
if prevIdx == 0 {
|
||||
// No ANSI code found
|
||||
rest = str
|
||||
@@ -178,51 +263,75 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
|
||||
output.WriteString(rest)
|
||||
trimmed = output.String()
|
||||
}
|
||||
if len(rest) > 0 && state != nil {
|
||||
// Update last offset
|
||||
runeCount += utf8.RuneCountInString(rest)
|
||||
(&offsets[len(offsets)-1]).offset[1] = int32(runeCount)
|
||||
}
|
||||
if proc != nil {
|
||||
proc(rest, state)
|
||||
}
|
||||
if len(offsets) == 0 {
|
||||
return trimmed, nil, state
|
||||
if len(offsets) > 0 {
|
||||
if len(rest) > 0 && state != nil {
|
||||
// Update last offset
|
||||
runeCount += utf8.RuneCountInString(rest)
|
||||
(&offsets[len(offsets)-1]).offset[1] = int32(runeCount)
|
||||
}
|
||||
// Return a copy of the offsets slice
|
||||
a := make([]ansiOffset, len(offsets))
|
||||
copy(a, offsets)
|
||||
return trimmed, &a, state
|
||||
}
|
||||
return trimmed, &offsets, state
|
||||
return trimmed, nil, state
|
||||
}
|
||||
|
||||
func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
|
||||
// State
|
||||
var state *ansiState
|
||||
func parseAnsiCode(s string) (int, string) {
|
||||
var remaining string
|
||||
if i := strings.IndexByte(s, ';'); i >= 0 {
|
||||
remaining = s[i+1:]
|
||||
s = s[:i]
|
||||
}
|
||||
|
||||
if len(s) > 0 {
|
||||
// Inlined version of strconv.Atoi() that only handles positive
|
||||
// integers and does not allocate on error.
|
||||
code := 0
|
||||
for _, ch := range []byte(s) {
|
||||
ch -= '0'
|
||||
if ch > 9 {
|
||||
return -1, remaining
|
||||
}
|
||||
code = code*10 + int(ch)
|
||||
}
|
||||
return code, remaining
|
||||
}
|
||||
|
||||
return -1, remaining
|
||||
}
|
||||
|
||||
func interpretCode(ansiCode string, prevState *ansiState) ansiState {
|
||||
var state ansiState
|
||||
if prevState == nil {
|
||||
state = &ansiState{-1, -1, 0, -1}
|
||||
state = ansiState{-1, -1, 0, -1}
|
||||
} else {
|
||||
state = &ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg}
|
||||
state = ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg}
|
||||
}
|
||||
if ansiCode[0] != '\x1b' || ansiCode[1] != '[' || ansiCode[len(ansiCode)-1] != 'm' {
|
||||
if strings.HasSuffix(ansiCode, "0K") && prevState != nil {
|
||||
if prevState != nil && strings.HasSuffix(ansiCode, "0K") {
|
||||
state.lbg = prevState.bg
|
||||
}
|
||||
return state
|
||||
}
|
||||
|
||||
ptr := &state.fg
|
||||
state256 := 0
|
||||
|
||||
init := func() {
|
||||
if len(ansiCode) <= 3 {
|
||||
state.fg = -1
|
||||
state.bg = -1
|
||||
state.attr = 0
|
||||
state256 = 0
|
||||
return state
|
||||
}
|
||||
|
||||
ansiCode = ansiCode[2 : len(ansiCode)-1]
|
||||
if len(ansiCode) == 0 {
|
||||
init()
|
||||
}
|
||||
for _, code := range strings.Split(ansiCode, ";") {
|
||||
if num, err := strconv.Atoi(code); err == nil {
|
||||
|
||||
state256 := 0
|
||||
ptr := &state.fg
|
||||
|
||||
for len(ansiCode) != 0 {
|
||||
var num int
|
||||
if num, ansiCode = parseAnsiCode(ansiCode); num != -1 {
|
||||
switch state256 {
|
||||
case 0:
|
||||
switch num {
|
||||
@@ -253,7 +362,10 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
|
||||
case 24: // tput rmul
|
||||
state.attr = state.attr &^ tui.Underline
|
||||
case 0:
|
||||
init()
|
||||
state.fg = -1
|
||||
state.bg = -1
|
||||
state.attr = 0
|
||||
state256 = 0
|
||||
default:
|
||||
if num >= 30 && num <= 37 {
|
||||
state.fg = tui.Color(num - 30)
|
||||
@@ -289,6 +401,7 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if state256 > 0 {
|
||||
*ptr = -1
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user