| | |
| | |
| | |
| |
|
| | package parse |
| |
|
| | import ( |
| | "fmt" |
| | "strings" |
| | "unicode" |
| | "unicode/utf8" |
| | ) |
| |
|
| | |
| | type item struct { |
| | typ itemType |
| | pos Pos |
| | val string |
| | line int |
| | } |
| |
|
| | func (i item) String() string { |
| | switch { |
| | case i.typ == itemEOF: |
| | return "EOF" |
| | case i.typ == itemError: |
| | return i.val |
| | case i.typ > itemKeyword: |
| | return fmt.Sprintf("<%s>", i.val) |
| | case len(i.val) > 10: |
| | return fmt.Sprintf("%.10q...", i.val) |
| | } |
| | return fmt.Sprintf("%q", i.val) |
| | } |
| |
|
| | |
| | type itemType int |
| |
|
| | const ( |
| | itemError itemType = iota |
| | itemBool |
| | itemChar |
| | itemCharConstant |
| | itemComment |
| | itemComplex |
| | itemAssign |
| | itemDeclare |
| | itemEOF |
| | itemField |
| | itemIdentifier |
| | itemLeftDelim |
| | itemLeftParen |
| | itemNumber |
| | itemPipe |
| | itemRawString |
| | itemRightDelim |
| | itemRightParen |
| | itemSpace |
| | itemString |
| | itemText |
| | itemVariable |
| | |
| | itemKeyword |
| | itemBlock |
| | itemBreak |
| | itemContinue |
| | itemDot |
| | itemDefine |
| | itemElse |
| | itemEnd |
| | itemIf |
| | itemNil |
| | itemRange |
| | itemTemplate |
| | itemWith |
| | ) |
| |
|
| | var key = map[string]itemType{ |
| | ".": itemDot, |
| | "block": itemBlock, |
| | "break": itemBreak, |
| | "continue": itemContinue, |
| | "define": itemDefine, |
| | "else": itemElse, |
| | "end": itemEnd, |
| | "if": itemIf, |
| | "range": itemRange, |
| | "nil": itemNil, |
| | "template": itemTemplate, |
| | "with": itemWith, |
| | } |
| |
|
| | const eof = -1 |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const ( |
| | spaceChars = " \t\r\n" |
| | trimMarker = '-' |
| | trimMarkerLen = Pos(1 + 1) |
| | ) |
| |
|
| | |
| | type stateFn func(*lexer) stateFn |
| |
|
| | |
| | type lexer struct { |
| | name string |
| | input string |
| | leftDelim string |
| | rightDelim string |
| | pos Pos |
| | start Pos |
| | atEOF bool |
| | parenDepth int |
| | line int |
| | startLine int |
| | item item |
| | insideAction bool |
| | options lexOptions |
| | } |
| |
|
| | |
| | type lexOptions struct { |
| | emitComment bool |
| | breakOK bool |
| | continueOK bool |
| | } |
| |
|
| | |
| | func (l *lexer) next() rune { |
| | if int(l.pos) >= len(l.input) { |
| | l.atEOF = true |
| | return eof |
| | } |
| | r, w := utf8.DecodeRuneInString(l.input[l.pos:]) |
| | l.pos += Pos(w) |
| | if r == '\n' { |
| | l.line++ |
| | } |
| | return r |
| | } |
| |
|
| | |
| | func (l *lexer) peek() rune { |
| | r := l.next() |
| | l.backup() |
| | return r |
| | } |
| |
|
| | |
| | func (l *lexer) backup() { |
| | if !l.atEOF && l.pos > 0 { |
| | r, w := utf8.DecodeLastRuneInString(l.input[:l.pos]) |
| | l.pos -= Pos(w) |
| | |
| | if r == '\n' { |
| | l.line-- |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | func (l *lexer) thisItem(t itemType) item { |
| | i := item{t, l.start, l.input[l.start:l.pos], l.startLine} |
| | l.start = l.pos |
| | l.startLine = l.line |
| | return i |
| | } |
| |
|
| | |
| | func (l *lexer) emit(t itemType) stateFn { |
| | return l.emitItem(l.thisItem(t)) |
| | } |
| |
|
| | |
| | func (l *lexer) emitItem(i item) stateFn { |
| | l.item = i |
| | return nil |
| | } |
| |
|
| | |
| | |
| | |
| | func (l *lexer) ignore() { |
| | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
| | l.start = l.pos |
| | l.startLine = l.line |
| | } |
| |
|
| | |
| | func (l *lexer) accept(valid string) bool { |
| | if strings.ContainsRune(valid, l.next()) { |
| | return true |
| | } |
| | l.backup() |
| | return false |
| | } |
| |
|
| | |
| | func (l *lexer) acceptRun(valid string) { |
| | for strings.ContainsRune(valid, l.next()) { |
| | } |
| | l.backup() |
| | } |
| |
|
| | |
| | |
| | func (l *lexer) errorf(format string, args ...any) stateFn { |
| | l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine} |
| | l.start = 0 |
| | l.pos = 0 |
| | l.input = l.input[:0] |
| | return nil |
| | } |
| |
|
| | |
| | |
| | func (l *lexer) nextItem() item { |
| | l.item = item{itemEOF, l.pos, "EOF", l.startLine} |
| | state := lexText |
| | if l.insideAction { |
| | state = lexInsideAction |
| | } |
| | for { |
| | state = state(l) |
| | if state == nil { |
| | return l.item |
| | } |
| | } |
| | } |
| |
|
| | |
| | func lex(name, input, left, right string) *lexer { |
| | if left == "" { |
| | left = leftDelim |
| | } |
| | if right == "" { |
| | right = rightDelim |
| | } |
| | l := &lexer{ |
| | name: name, |
| | input: input, |
| | leftDelim: left, |
| | rightDelim: right, |
| | line: 1, |
| | startLine: 1, |
| | insideAction: false, |
| | } |
| | return l |
| | } |
| |
|
| | |
| |
|
| | const ( |
| | leftDelim = "{{" |
| | rightDelim = "}}" |
| | leftComment = "/*" |
| | rightComment = "*/" |
| | ) |
| |
|
| | |
| | func lexText(l *lexer) stateFn { |
| | if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 { |
| | if x > 0 { |
| | l.pos += Pos(x) |
| | |
| | trimLength := Pos(0) |
| | delimEnd := l.pos + Pos(len(l.leftDelim)) |
| | if hasLeftTrimMarker(l.input[delimEnd:]) { |
| | trimLength = rightTrimLength(l.input[l.start:l.pos]) |
| | } |
| | l.pos -= trimLength |
| | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
| | i := l.thisItem(itemText) |
| | l.pos += trimLength |
| | l.ignore() |
| | if len(i.val) > 0 { |
| | return l.emitItem(i) |
| | } |
| | } |
| | return lexLeftDelim |
| | } |
| | l.pos = Pos(len(l.input)) |
| | |
| | if l.pos > l.start { |
| | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
| | return l.emit(itemText) |
| | } |
| | return l.emit(itemEOF) |
| | } |
| |
|
| | |
| | func rightTrimLength(s string) Pos { |
| | return Pos(len(s) - len(strings.TrimRight(s, spaceChars))) |
| | } |
| |
|
| | |
| | func (l *lexer) atRightDelim() (delim, trimSpaces bool) { |
| | if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { |
| | return true, true |
| | } |
| | if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { |
| | return true, false |
| | } |
| | return false, false |
| | } |
| |
|
| | |
| | func leftTrimLength(s string) Pos { |
| | return Pos(len(s) - len(strings.TrimLeft(s, spaceChars))) |
| | } |
| |
|
| | |
| | |
| | func lexLeftDelim(l *lexer) stateFn { |
| | l.pos += Pos(len(l.leftDelim)) |
| | trimSpace := hasLeftTrimMarker(l.input[l.pos:]) |
| | afterMarker := Pos(0) |
| | if trimSpace { |
| | afterMarker = trimMarkerLen |
| | } |
| | if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) { |
| | l.pos += afterMarker |
| | l.ignore() |
| | return lexComment |
| | } |
| | i := l.thisItem(itemLeftDelim) |
| | l.insideAction = true |
| | l.pos += afterMarker |
| | l.ignore() |
| | l.parenDepth = 0 |
| | return l.emitItem(i) |
| | } |
| |
|
| | |
| | func lexComment(l *lexer) stateFn { |
| | l.pos += Pos(len(leftComment)) |
| | x := strings.Index(l.input[l.pos:], rightComment) |
| | if x < 0 { |
| | return l.errorf("unclosed comment") |
| | } |
| | l.pos += Pos(x + len(rightComment)) |
| | delim, trimSpace := l.atRightDelim() |
| | if !delim { |
| | return l.errorf("comment ends before closing delimiter") |
| | } |
| | l.line += strings.Count(l.input[l.start:l.pos], "\n") |
| | i := l.thisItem(itemComment) |
| | if trimSpace { |
| | l.pos += trimMarkerLen |
| | } |
| | l.pos += Pos(len(l.rightDelim)) |
| | if trimSpace { |
| | l.pos += leftTrimLength(l.input[l.pos:]) |
| | } |
| | l.ignore() |
| | if l.options.emitComment { |
| | return l.emitItem(i) |
| | } |
| | return lexText |
| | } |
| |
|
| | |
| | func lexRightDelim(l *lexer) stateFn { |
| | _, trimSpace := l.atRightDelim() |
| | if trimSpace { |
| | l.pos += trimMarkerLen |
| | l.ignore() |
| | } |
| | l.pos += Pos(len(l.rightDelim)) |
| | i := l.thisItem(itemRightDelim) |
| | if trimSpace { |
| | l.pos += leftTrimLength(l.input[l.pos:]) |
| | l.ignore() |
| | } |
| | l.insideAction = false |
| | return l.emitItem(i) |
| | } |
| |
|
| | |
| | func lexInsideAction(l *lexer) stateFn { |
| | |
| | |
| | |
| | delim, _ := l.atRightDelim() |
| | if delim { |
| | if l.parenDepth == 0 { |
| | return lexRightDelim |
| | } |
| | return l.errorf("unclosed left paren") |
| | } |
| | switch r := l.next(); { |
| | case r == eof: |
| | return l.errorf("unclosed action") |
| | case isSpace(r): |
| | l.backup() |
| | return lexSpace |
| | case r == '=': |
| | return l.emit(itemAssign) |
| | case r == ':': |
| | if l.next() != '=' { |
| | return l.errorf("expected :=") |
| | } |
| | return l.emit(itemDeclare) |
| | case r == '|': |
| | return l.emit(itemPipe) |
| | case r == '"': |
| | return lexQuote |
| | case r == '`': |
| | return lexRawQuote |
| | case r == '$': |
| | return lexVariable |
| | case r == '\'': |
| | return lexChar |
| | case r == '.': |
| | |
| | if l.pos < Pos(len(l.input)) { |
| | r := l.input[l.pos] |
| | if r < '0' || '9' < r { |
| | return lexField |
| | } |
| | } |
| | fallthrough |
| | case r == '+' || r == '-' || ('0' <= r && r <= '9'): |
| | l.backup() |
| | return lexNumber |
| | case isAlphaNumeric(r): |
| | l.backup() |
| | return lexIdentifier |
| | case r == '(': |
| | l.parenDepth++ |
| | return l.emit(itemLeftParen) |
| | case r == ')': |
| | l.parenDepth-- |
| | if l.parenDepth < 0 { |
| | return l.errorf("unexpected right paren") |
| | } |
| | return l.emit(itemRightParen) |
| | case r <= unicode.MaxASCII && unicode.IsPrint(r): |
| | return l.emit(itemChar) |
| | default: |
| | return l.errorf("unrecognized character in action: %#U", r) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | func lexSpace(l *lexer) stateFn { |
| | var r rune |
| | var numSpaces int |
| | for { |
| | r = l.peek() |
| | if !isSpace(r) { |
| | break |
| | } |
| | l.next() |
| | numSpaces++ |
| | } |
| | |
| | |
| | if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) { |
| | l.backup() |
| | if numSpaces == 1 { |
| | return lexRightDelim |
| | } |
| | } |
| | return l.emit(itemSpace) |
| | } |
| |
|
| | |
| | func lexIdentifier(l *lexer) stateFn { |
| | for { |
| | switch r := l.next(); { |
| | case isAlphaNumeric(r): |
| | |
| | default: |
| | l.backup() |
| | word := l.input[l.start:l.pos] |
| | if !l.atTerminator() { |
| | return l.errorf("bad character %#U", r) |
| | } |
| | switch { |
| | case key[word] > itemKeyword: |
| | item := key[word] |
| | if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK { |
| | return l.emit(itemIdentifier) |
| | } |
| | return l.emit(item) |
| | case word[0] == '.': |
| | return l.emit(itemField) |
| | case word == "true", word == "false": |
| | return l.emit(itemBool) |
| | default: |
| | return l.emit(itemIdentifier) |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | func lexField(l *lexer) stateFn { |
| | return lexFieldOrVariable(l, itemField) |
| | } |
| |
|
| | |
| | |
| | func lexVariable(l *lexer) stateFn { |
| | if l.atTerminator() { |
| | return l.emit(itemVariable) |
| | } |
| | return lexFieldOrVariable(l, itemVariable) |
| | } |
| |
|
| | |
| | |
| | func lexFieldOrVariable(l *lexer, typ itemType) stateFn { |
| | if l.atTerminator() { |
| | if typ == itemVariable { |
| | return l.emit(itemVariable) |
| | } |
| | return l.emit(itemDot) |
| | } |
| | var r rune |
| | for { |
| | r = l.next() |
| | if !isAlphaNumeric(r) { |
| | l.backup() |
| | break |
| | } |
| | } |
| | if !l.atTerminator() { |
| | return l.errorf("bad character %#U", r) |
| | } |
| | return l.emit(typ) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | func (l *lexer) atTerminator() bool { |
| | r := l.peek() |
| | if isSpace(r) { |
| | return true |
| | } |
| | switch r { |
| | case eof, '.', ',', '|', ':', ')', '(': |
| | return true |
| | } |
| | return strings.HasPrefix(l.input[l.pos:], l.rightDelim) |
| | } |
| |
|
| | |
| | |
| | func lexChar(l *lexer) stateFn { |
| | Loop: |
| | for { |
| | switch l.next() { |
| | case '\\': |
| | if r := l.next(); r != eof && r != '\n' { |
| | break |
| | } |
| | fallthrough |
| | case eof, '\n': |
| | return l.errorf("unterminated character constant") |
| | case '\'': |
| | break Loop |
| | } |
| | } |
| | return l.emit(itemCharConstant) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | func lexNumber(l *lexer) stateFn { |
| | if !l.scanNumber() { |
| | return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) |
| | } |
| | if sign := l.peek(); sign == '+' || sign == '-' { |
| | |
| | if !l.scanNumber() || l.input[l.pos-1] != 'i' { |
| | return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) |
| | } |
| | return l.emit(itemComplex) |
| | } |
| | return l.emit(itemNumber) |
| | } |
| |
|
| | func (l *lexer) scanNumber() bool { |
| | |
| | l.accept("+-") |
| | |
| | digits := "0123456789_" |
| | if l.accept("0") { |
| | |
| | if l.accept("xX") { |
| | digits = "0123456789abcdefABCDEF_" |
| | } else if l.accept("oO") { |
| | digits = "01234567_" |
| | } else if l.accept("bB") { |
| | digits = "01_" |
| | } |
| | } |
| | l.acceptRun(digits) |
| | if l.accept(".") { |
| | l.acceptRun(digits) |
| | } |
| | if len(digits) == 10+1 && l.accept("eE") { |
| | l.accept("+-") |
| | l.acceptRun("0123456789_") |
| | } |
| | if len(digits) == 16+6+1 && l.accept("pP") { |
| | l.accept("+-") |
| | l.acceptRun("0123456789_") |
| | } |
| | |
| | l.accept("i") |
| | |
| | if isAlphaNumeric(l.peek()) { |
| | l.next() |
| | return false |
| | } |
| | return true |
| | } |
| |
|
| | |
| | func lexQuote(l *lexer) stateFn { |
| | Loop: |
| | for { |
| | switch l.next() { |
| | case '\\': |
| | if r := l.next(); r != eof && r != '\n' { |
| | break |
| | } |
| | fallthrough |
| | case eof, '\n': |
| | return l.errorf("unterminated quoted string") |
| | case '"': |
| | break Loop |
| | } |
| | } |
| | return l.emit(itemString) |
| | } |
| |
|
| | |
| | func lexRawQuote(l *lexer) stateFn { |
| | Loop: |
| | for { |
| | switch l.next() { |
| | case eof: |
| | return l.errorf("unterminated raw quoted string") |
| | case '`': |
| | break Loop |
| | } |
| | } |
| | return l.emit(itemRawString) |
| | } |
| |
|
| | |
| | func isSpace(r rune) bool { |
| | return r == ' ' || r == '\t' || r == '\r' || r == '\n' |
| | } |
| |
|
| | |
| | func isAlphaNumeric(r rune) bool { |
| | return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) |
| | } |
| |
|
| | func hasLeftTrimMarker(s string) bool { |
| | return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1])) |
| | } |
| |
|
| | func hasRightTrimMarker(s string) bool { |
| | return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker |
| | } |
| |
|