| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | package syntax |
| |
|
| | import ( |
| | "fmt" |
| | "io" |
| | "unicode" |
| | "unicode/utf8" |
| | ) |
| |
|
| | |
| | |
| | |
| | const ( |
| | comments uint = 1 << iota |
| | directives |
| | ) |
| |
|
| | type scanner struct { |
| | source |
| | mode uint |
| | nlsemi bool |
| |
|
| | |
| | line, col uint |
| | blank bool |
| | tok token |
| | lit string |
| | bad bool |
| | kind LitKind |
| | op Operator |
| | prec int |
| | } |
| |
|
| | func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) { |
| | s.source.init(src, errh) |
| | s.mode = mode |
| | s.nlsemi = false |
| | } |
| |
|
| | |
| | func (s *scanner) errorf(format string, args ...any) { |
| | s.error(fmt.Sprintf(format, args...)) |
| | } |
| |
|
| | |
| | func (s *scanner) errorAtf(offset int, format string, args ...any) { |
| | s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...)) |
| | } |
| |
|
| | |
| | func (s *scanner) setLit(kind LitKind, ok bool) { |
| | s.nlsemi = true |
| | s.tok = _Literal |
| | s.lit = string(s.segment()) |
| | s.bad = !ok |
| | s.kind = kind |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | func (s *scanner) next() { |
| | nlsemi := s.nlsemi |
| | s.nlsemi = false |
| |
|
| | redo: |
| | |
| | s.stop() |
| | startLine, startCol := s.pos() |
| | for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !nlsemi || s.ch == '\r' { |
| | s.nextch() |
| | } |
| |
|
| | |
| | s.line, s.col = s.pos() |
| | s.blank = s.line > startLine || startCol == colbase |
| | s.start() |
| | if isLetter(s.ch) || s.ch >= utf8.RuneSelf && s.atIdentChar(true) { |
| | s.nextch() |
| | s.ident() |
| | return |
| | } |
| |
|
| | switch s.ch { |
| | case -1: |
| | if nlsemi { |
| | s.lit = "EOF" |
| | s.tok = _Semi |
| | break |
| | } |
| | s.tok = _EOF |
| |
|
| | case '\n': |
| | s.nextch() |
| | s.lit = "newline" |
| | s.tok = _Semi |
| |
|
| | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| | s.number(false) |
| |
|
| | case '"': |
| | s.stdString() |
| |
|
| | case '`': |
| | s.rawString() |
| |
|
| | case '\'': |
| | s.rune() |
| |
|
| | case '(': |
| | s.nextch() |
| | s.tok = _Lparen |
| |
|
| | case '[': |
| | s.nextch() |
| | s.tok = _Lbrack |
| |
|
| | case '{': |
| | s.nextch() |
| | s.tok = _Lbrace |
| |
|
| | case ',': |
| | s.nextch() |
| | s.tok = _Comma |
| |
|
| | case ';': |
| | s.nextch() |
| | s.lit = "semicolon" |
| | s.tok = _Semi |
| |
|
| | case ')': |
| | s.nextch() |
| | s.nlsemi = true |
| | s.tok = _Rparen |
| |
|
| | case ']': |
| | s.nextch() |
| | s.nlsemi = true |
| | s.tok = _Rbrack |
| |
|
| | case '}': |
| | s.nextch() |
| | s.nlsemi = true |
| | s.tok = _Rbrace |
| |
|
| | case ':': |
| | s.nextch() |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.tok = _Define |
| | break |
| | } |
| | s.tok = _Colon |
| |
|
| | case '.': |
| | s.nextch() |
| | if isDecimal(s.ch) { |
| | s.number(true) |
| | break |
| | } |
| | if s.ch == '.' { |
| | s.nextch() |
| | if s.ch == '.' { |
| | s.nextch() |
| | s.tok = _DotDotDot |
| | break |
| | } |
| | s.rewind() |
| | s.nextch() |
| | } |
| | s.tok = _Dot |
| |
|
| | case '+': |
| | s.nextch() |
| | s.op, s.prec = Add, precAdd |
| | if s.ch != '+' { |
| | goto assignop |
| | } |
| | s.nextch() |
| | s.nlsemi = true |
| | s.tok = _IncOp |
| |
|
| | case '-': |
| | s.nextch() |
| | s.op, s.prec = Sub, precAdd |
| | if s.ch != '-' { |
| | goto assignop |
| | } |
| | s.nextch() |
| | s.nlsemi = true |
| | s.tok = _IncOp |
| |
|
| | case '*': |
| | s.nextch() |
| | s.op, s.prec = Mul, precMul |
| | |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.tok = _AssignOp |
| | break |
| | } |
| | s.tok = _Star |
| |
|
| | case '/': |
| | s.nextch() |
| | if s.ch == '/' { |
| | s.nextch() |
| | s.lineComment() |
| | goto redo |
| | } |
| | if s.ch == '*' { |
| | s.nextch() |
| | s.fullComment() |
| | if line, _ := s.pos(); line > s.line && nlsemi { |
| | |
| | |
| | s.lit = "newline" |
| | s.tok = _Semi |
| | break |
| | } |
| | goto redo |
| | } |
| | s.op, s.prec = Div, precMul |
| | goto assignop |
| |
|
| | case '%': |
| | s.nextch() |
| | s.op, s.prec = Rem, precMul |
| | goto assignop |
| |
|
| | case '&': |
| | s.nextch() |
| | if s.ch == '&' { |
| | s.nextch() |
| | s.op, s.prec = AndAnd, precAndAnd |
| | s.tok = _Operator |
| | break |
| | } |
| | s.op, s.prec = And, precMul |
| | if s.ch == '^' { |
| | s.nextch() |
| | s.op = AndNot |
| | } |
| | goto assignop |
| |
|
| | case '|': |
| | s.nextch() |
| | if s.ch == '|' { |
| | s.nextch() |
| | s.op, s.prec = OrOr, precOrOr |
| | s.tok = _Operator |
| | break |
| | } |
| | s.op, s.prec = Or, precAdd |
| | goto assignop |
| |
|
| | case '^': |
| | s.nextch() |
| | s.op, s.prec = Xor, precAdd |
| | goto assignop |
| |
|
| | case '<': |
| | s.nextch() |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.op, s.prec = Leq, precCmp |
| | s.tok = _Operator |
| | break |
| | } |
| | if s.ch == '<' { |
| | s.nextch() |
| | s.op, s.prec = Shl, precMul |
| | goto assignop |
| | } |
| | if s.ch == '-' { |
| | s.nextch() |
| | s.tok = _Arrow |
| | break |
| | } |
| | s.op, s.prec = Lss, precCmp |
| | s.tok = _Operator |
| |
|
| | case '>': |
| | s.nextch() |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.op, s.prec = Geq, precCmp |
| | s.tok = _Operator |
| | break |
| | } |
| | if s.ch == '>' { |
| | s.nextch() |
| | s.op, s.prec = Shr, precMul |
| | goto assignop |
| | } |
| | s.op, s.prec = Gtr, precCmp |
| | s.tok = _Operator |
| |
|
| | case '=': |
| | s.nextch() |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.op, s.prec = Eql, precCmp |
| | s.tok = _Operator |
| | break |
| | } |
| | s.tok = _Assign |
| |
|
| | case '!': |
| | s.nextch() |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.op, s.prec = Neq, precCmp |
| | s.tok = _Operator |
| | break |
| | } |
| | s.op, s.prec = Not, 0 |
| | s.tok = _Operator |
| |
|
| | case '~': |
| | s.nextch() |
| | s.op, s.prec = Tilde, 0 |
| | s.tok = _Operator |
| |
|
| | default: |
| | s.errorf("invalid character %#U", s.ch) |
| | s.nextch() |
| | goto redo |
| | } |
| |
|
| | return |
| |
|
| | assignop: |
| | if s.ch == '=' { |
| | s.nextch() |
| | s.tok = _AssignOp |
| | return |
| | } |
| | s.tok = _Operator |
| | } |
| |
|
| | func (s *scanner) ident() { |
| | |
| | for isLetter(s.ch) || isDecimal(s.ch) { |
| | s.nextch() |
| | } |
| |
|
| | |
| | if s.ch >= utf8.RuneSelf { |
| | for s.atIdentChar(false) { |
| | s.nextch() |
| | } |
| | } |
| |
|
| | |
| | lit := s.segment() |
| | if len(lit) >= 2 { |
| | if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) { |
| | s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok) |
| | s.tok = tok |
| | return |
| | } |
| | } |
| |
|
| | s.nlsemi = true |
| | s.lit = string(lit) |
| | s.tok = _Name |
| | } |
| |
|
| | |
| | |
| | func tokStrFast(tok token) string { |
| | return _token_name[_token_index[tok-1]:_token_index[tok]] |
| | } |
| |
|
| | func (s *scanner) atIdentChar(first bool) bool { |
| | switch { |
| | case unicode.IsLetter(s.ch) || s.ch == '_': |
| | |
| | case unicode.IsDigit(s.ch): |
| | if first { |
| | s.errorf("identifier cannot begin with digit %#U", s.ch) |
| | } |
| | case s.ch >= utf8.RuneSelf: |
| | s.errorf("invalid character %#U in identifier", s.ch) |
| | default: |
| | return false |
| | } |
| | return true |
| | } |
| |
|
| | |
| | |
| | func hash(s []byte) uint { |
| | return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1) |
| | } |
| |
|
| | var keywordMap [1 << 6]token |
| |
|
| | func init() { |
| | |
| | for tok := _Break; tok <= _Var; tok++ { |
| | h := hash([]byte(tok.String())) |
| | if keywordMap[h] != 0 { |
| | panic("imperfect hash") |
| | } |
| | keywordMap[h] = tok |
| | } |
| | } |
| |
|
| | func lower(ch rune) rune { return ('a' - 'A') | ch } |
| | func isLetter(ch rune) bool { return 'a' <= lower(ch) && lower(ch) <= 'z' || ch == '_' } |
| | func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' } |
| | func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | func (s *scanner) digits(base int, invalid *int) (digsep int) { |
| | if base <= 10 { |
| | max := rune('0' + base) |
| | for isDecimal(s.ch) || s.ch == '_' { |
| | ds := 1 |
| | if s.ch == '_' { |
| | ds = 2 |
| | } else if s.ch >= max && *invalid < 0 { |
| | _, col := s.pos() |
| | *invalid = int(col - s.col) |
| | } |
| | digsep |= ds |
| | s.nextch() |
| | } |
| | } else { |
| | for isHex(s.ch) || s.ch == '_' { |
| | ds := 1 |
| | if s.ch == '_' { |
| | ds = 2 |
| | } |
| | digsep |= ds |
| | s.nextch() |
| | } |
| | } |
| | return |
| | } |
| |
|
| | func (s *scanner) number(seenPoint bool) { |
| | ok := true |
| | kind := IntLit |
| | base := 10 |
| | prefix := rune(0) |
| | digsep := 0 |
| | invalid := -1 |
| |
|
| | |
| | if !seenPoint { |
| | if s.ch == '0' { |
| | s.nextch() |
| | switch lower(s.ch) { |
| | case 'x': |
| | s.nextch() |
| | base, prefix = 16, 'x' |
| | case 'o': |
| | s.nextch() |
| | base, prefix = 8, 'o' |
| | case 'b': |
| | s.nextch() |
| | base, prefix = 2, 'b' |
| | default: |
| | base, prefix = 8, '0' |
| | digsep = 1 |
| | } |
| | } |
| | digsep |= s.digits(base, &invalid) |
| | if s.ch == '.' { |
| | if prefix == 'o' || prefix == 'b' { |
| | s.errorf("invalid radix point in %s literal", baseName(base)) |
| | ok = false |
| | } |
| | s.nextch() |
| | seenPoint = true |
| | } |
| | } |
| |
|
| | |
| | if seenPoint { |
| | kind = FloatLit |
| | digsep |= s.digits(base, &invalid) |
| | } |
| |
|
| | if digsep&1 == 0 && ok { |
| | s.errorf("%s literal has no digits", baseName(base)) |
| | ok = false |
| | } |
| |
|
| | |
| | if e := lower(s.ch); e == 'e' || e == 'p' { |
| | if ok { |
| | switch { |
| | case e == 'e' && prefix != 0 && prefix != '0': |
| | s.errorf("%q exponent requires decimal mantissa", s.ch) |
| | ok = false |
| | case e == 'p' && prefix != 'x': |
| | s.errorf("%q exponent requires hexadecimal mantissa", s.ch) |
| | ok = false |
| | } |
| | } |
| | s.nextch() |
| | kind = FloatLit |
| | if s.ch == '+' || s.ch == '-' { |
| | s.nextch() |
| | } |
| | digsep = s.digits(10, nil) | digsep&2 |
| | if digsep&1 == 0 && ok { |
| | s.errorf("exponent has no digits") |
| | ok = false |
| | } |
| | } else if prefix == 'x' && kind == FloatLit && ok { |
| | s.errorf("hexadecimal mantissa requires a 'p' exponent") |
| | ok = false |
| | } |
| |
|
| | |
| | if s.ch == 'i' { |
| | kind = ImagLit |
| | s.nextch() |
| | } |
| |
|
| | s.setLit(kind, ok) |
| |
|
| | if kind == IntLit && invalid >= 0 && ok { |
| | s.errorAtf(invalid, "invalid digit %q in %s literal", s.lit[invalid], baseName(base)) |
| | ok = false |
| | } |
| |
|
| | if digsep&2 != 0 && ok { |
| | if i := invalidSep(s.lit); i >= 0 { |
| | s.errorAtf(i, "'_' must separate successive digits") |
| | ok = false |
| | } |
| | } |
| |
|
| | s.bad = !ok |
| | } |
| |
|
| | func baseName(base int) string { |
| | switch base { |
| | case 2: |
| | return "binary" |
| | case 8: |
| | return "octal" |
| | case 10: |
| | return "decimal" |
| | case 16: |
| | return "hexadecimal" |
| | } |
| | panic("invalid base") |
| | } |
| |
|
| | |
| | func invalidSep(x string) int { |
| | x1 := ' ' |
| | d := '.' |
| | i := 0 |
| |
|
| | |
| | if len(x) >= 2 && x[0] == '0' { |
| | x1 = lower(rune(x[1])) |
| | if x1 == 'x' || x1 == 'o' || x1 == 'b' { |
| | d = '0' |
| | i = 2 |
| | } |
| | } |
| |
|
| | |
| | for ; i < len(x); i++ { |
| | p := d |
| | d = rune(x[i]) |
| | switch { |
| | case d == '_': |
| | if p != '0' { |
| | return i |
| | } |
| | case isDecimal(d) || x1 == 'x' && isHex(d): |
| | d = '0' |
| | default: |
| | if p == '_' { |
| | return i - 1 |
| | } |
| | d = '.' |
| | } |
| | } |
| | if d == '_' { |
| | return len(x) - 1 |
| | } |
| |
|
| | return -1 |
| | } |
| |
|
| | func (s *scanner) rune() { |
| | ok := true |
| | s.nextch() |
| |
|
| | n := 0 |
| | for ; ; n++ { |
| | if s.ch == '\'' { |
| | if ok { |
| | if n == 0 { |
| | s.errorf("empty rune literal or unescaped '") |
| | ok = false |
| | } else if n != 1 { |
| | s.errorAtf(0, "more than one character in rune literal") |
| | ok = false |
| | } |
| | } |
| | s.nextch() |
| | break |
| | } |
| | if s.ch == '\\' { |
| | s.nextch() |
| | if !s.escape('\'') { |
| | ok = false |
| | } |
| | continue |
| | } |
| | if s.ch == '\n' { |
| | if ok { |
| | s.errorf("newline in rune literal") |
| | ok = false |
| | } |
| | break |
| | } |
| | if s.ch < 0 { |
| | if ok { |
| | s.errorAtf(0, "rune literal not terminated") |
| | ok = false |
| | } |
| | break |
| | } |
| | s.nextch() |
| | } |
| |
|
| | s.setLit(RuneLit, ok) |
| | } |
| |
|
| | func (s *scanner) stdString() { |
| | ok := true |
| | s.nextch() |
| |
|
| | for { |
| | if s.ch == '"' { |
| | s.nextch() |
| | break |
| | } |
| | if s.ch == '\\' { |
| | s.nextch() |
| | if !s.escape('"') { |
| | ok = false |
| | } |
| | continue |
| | } |
| | if s.ch == '\n' { |
| | s.errorf("newline in string") |
| | ok = false |
| | break |
| | } |
| | if s.ch < 0 { |
| | s.errorAtf(0, "string not terminated") |
| | ok = false |
| | break |
| | } |
| | s.nextch() |
| | } |
| |
|
| | s.setLit(StringLit, ok) |
| | } |
| |
|
| | func (s *scanner) rawString() { |
| | ok := true |
| | s.nextch() |
| |
|
| | for { |
| | if s.ch == '`' { |
| | s.nextch() |
| | break |
| | } |
| | if s.ch < 0 { |
| | s.errorAtf(0, "string not terminated") |
| | ok = false |
| | break |
| | } |
| | s.nextch() |
| | } |
| | |
| | |
| | |
| |
|
| | s.setLit(StringLit, ok) |
| | } |
| |
|
| | func (s *scanner) comment(text string) { |
| | s.errorAtf(0, "%s", text) |
| | } |
| |
|
| | func (s *scanner) skipLine() { |
| | |
| | for s.ch >= 0 && s.ch != '\n' { |
| | s.nextch() |
| | } |
| | } |
| |
|
| | func (s *scanner) lineComment() { |
| | |
| |
|
| | if s.mode&comments != 0 { |
| | s.skipLine() |
| | s.comment(string(s.segment())) |
| | return |
| | } |
| |
|
| | |
| | if s.mode&directives == 0 || (s.ch != 'g' && s.ch != 'l') { |
| | s.stop() |
| | s.skipLine() |
| | return |
| | } |
| |
|
| | |
| | prefix := "go:" |
| | if s.ch == 'l' { |
| | prefix = "line " |
| | } |
| | for _, m := range prefix { |
| | if s.ch != m { |
| | s.stop() |
| | s.skipLine() |
| | return |
| | } |
| | s.nextch() |
| | } |
| |
|
| | |
| | s.skipLine() |
| | s.comment(string(s.segment())) |
| | } |
| |
|
| | func (s *scanner) skipComment() bool { |
| | for s.ch >= 0 { |
| | for s.ch == '*' { |
| | s.nextch() |
| | if s.ch == '/' { |
| | s.nextch() |
| | return true |
| | } |
| | } |
| | s.nextch() |
| | } |
| | s.errorAtf(0, "comment not terminated") |
| | return false |
| | } |
| |
|
| | func (s *scanner) fullComment() { |
| | |
| |
|
| | if s.mode&comments != 0 { |
| | if s.skipComment() { |
| | s.comment(string(s.segment())) |
| | } |
| | return |
| | } |
| |
|
| | if s.mode&directives == 0 || s.ch != 'l' { |
| | s.stop() |
| | s.skipComment() |
| | return |
| | } |
| |
|
| | |
| | const prefix = "line " |
| | for _, m := range prefix { |
| | if s.ch != m { |
| | s.stop() |
| | s.skipComment() |
| | return |
| | } |
| | s.nextch() |
| | } |
| |
|
| | |
| | if s.skipComment() { |
| | s.comment(string(s.segment())) |
| | } |
| | } |
| |
|
| | func (s *scanner) escape(quote rune) bool { |
| | var n int |
| | var base, max uint32 |
| |
|
| | switch s.ch { |
| | case quote, 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\': |
| | s.nextch() |
| | return true |
| | case '0', '1', '2', '3', '4', '5', '6', '7': |
| | n, base, max = 3, 8, 255 |
| | case 'x': |
| | s.nextch() |
| | n, base, max = 2, 16, 255 |
| | case 'u': |
| | s.nextch() |
| | n, base, max = 4, 16, unicode.MaxRune |
| | case 'U': |
| | s.nextch() |
| | n, base, max = 8, 16, unicode.MaxRune |
| | default: |
| | if s.ch < 0 { |
| | return true |
| | } |
| | s.errorf("unknown escape") |
| | return false |
| | } |
| |
|
| | var x uint32 |
| | for i := n; i > 0; i-- { |
| | if s.ch < 0 { |
| | return true |
| | } |
| | d := base |
| | if isDecimal(s.ch) { |
| | d = uint32(s.ch) - '0' |
| | } else if 'a' <= lower(s.ch) && lower(s.ch) <= 'f' { |
| | d = uint32(lower(s.ch)) - 'a' + 10 |
| | } |
| | if d >= base { |
| | s.errorf("invalid character %q in %s escape", s.ch, baseName(int(base))) |
| | return false |
| | } |
| | |
| | x = x*base + d |
| | s.nextch() |
| | } |
| |
|
| | if x > max && base == 8 { |
| | s.errorf("octal escape value %d > 255", x) |
| | return false |
| | } |
| |
|
| | if x > max || 0xD800 <= x && x < 0xE000 { |
| | s.errorf("escape is invalid Unicode code point %#U", x) |
| | return false |
| | } |
| |
|
| | return true |
| | } |
| |
|