| |
| |
| |
|
|
| package syntax |
|
|
| import ( |
| "strconv" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
| ) |
|
|
| |
| |
|
|
| |
| type Prog struct { |
| Inst []Inst |
| Start int |
| NumCap int |
| } |
|
|
| |
| type InstOp uint8 |
|
|
| const ( |
| InstAlt InstOp = iota |
| InstAltMatch |
| InstCapture |
| InstEmptyWidth |
| InstMatch |
| InstFail |
| InstNop |
| InstRune |
| InstRune1 |
| InstRuneAny |
| InstRuneAnyNotNL |
| ) |
|
|
| var instOpNames = []string{ |
| "InstAlt", |
| "InstAltMatch", |
| "InstCapture", |
| "InstEmptyWidth", |
| "InstMatch", |
| "InstFail", |
| "InstNop", |
| "InstRune", |
| "InstRune1", |
| "InstRuneAny", |
| "InstRuneAnyNotNL", |
| } |
|
|
| func (i InstOp) String() string { |
| if uint(i) >= uint(len(instOpNames)) { |
| return "" |
| } |
| return instOpNames[i] |
| } |
|
|
| |
| type EmptyOp uint8 |
|
|
| const ( |
| EmptyBeginLine EmptyOp = 1 << iota |
| EmptyEndLine |
| EmptyBeginText |
| EmptyEndText |
| EmptyWordBoundary |
| EmptyNoWordBoundary |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| func EmptyOpContext(r1, r2 rune) EmptyOp { |
| var op EmptyOp = EmptyNoWordBoundary |
| var boundary byte |
| switch { |
| case IsWordChar(r1): |
| boundary = 1 |
| case r1 == '\n': |
| op |= EmptyBeginLine |
| case r1 < 0: |
| op |= EmptyBeginText | EmptyBeginLine |
| } |
| switch { |
| case IsWordChar(r2): |
| boundary ^= 1 |
| case r2 == '\n': |
| op |= EmptyEndLine |
| case r2 < 0: |
| op |= EmptyEndText | EmptyEndLine |
| } |
| if boundary != 0 { |
| op ^= (EmptyWordBoundary | EmptyNoWordBoundary) |
| } |
| return op |
| } |
|
|
| |
| |
| |
| func IsWordChar(r rune) bool { |
| |
| |
| return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || '0' <= r && r <= '9' || r == '_' |
| } |
|
|
| |
| type Inst struct { |
| Op InstOp |
| Out uint32 |
| Arg uint32 |
| Rune []rune |
| } |
|
|
| func (p *Prog) String() string { |
| var b strings.Builder |
| dumpProg(&b, p) |
| return b.String() |
| } |
|
|
| |
| func (p *Prog) skipNop(pc uint32) *Inst { |
| i := &p.Inst[pc] |
| for i.Op == InstNop || i.Op == InstCapture { |
| i = &p.Inst[i.Out] |
| } |
| return i |
| } |
|
|
| |
| func (i *Inst) op() InstOp { |
| op := i.Op |
| switch op { |
| case InstRune1, InstRuneAny, InstRuneAnyNotNL: |
| op = InstRune |
| } |
| return op |
| } |
|
|
| |
| |
| |
| func (p *Prog) Prefix() (prefix string, complete bool) { |
| i := p.skipNop(uint32(p.Start)) |
|
|
| |
| if i.op() != InstRune || len(i.Rune) != 1 { |
| return "", i.Op == InstMatch |
| } |
|
|
| |
| var buf strings.Builder |
| for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 && i.Rune[0] != utf8.RuneError { |
| buf.WriteRune(i.Rune[0]) |
| i = p.skipNop(i.Out) |
| } |
| return buf.String(), i.Op == InstMatch |
| } |
|
|
| |
| |
| func (p *Prog) StartCond() EmptyOp { |
| var flag EmptyOp |
| pc := uint32(p.Start) |
| i := &p.Inst[pc] |
| Loop: |
| for { |
| switch i.Op { |
| case InstEmptyWidth: |
| flag |= EmptyOp(i.Arg) |
| case InstFail: |
| return ^EmptyOp(0) |
| case InstCapture, InstNop: |
| |
| default: |
| break Loop |
| } |
| pc = i.Out |
| i = &p.Inst[pc] |
| } |
| return flag |
| } |
|
|
| const noMatch = -1 |
|
|
| |
| |
| func (i *Inst) MatchRune(r rune) bool { |
| return i.MatchRunePos(r) != noMatch |
| } |
|
|
| |
| |
| |
| |
| |
| func (i *Inst) MatchRunePos(r rune) int { |
| rune := i.Rune |
|
|
| switch len(rune) { |
| case 0: |
| return noMatch |
|
|
| case 1: |
| |
| r0 := rune[0] |
| if r == r0 { |
| return 0 |
| } |
| if Flags(i.Arg)&FoldCase != 0 { |
| for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { |
| if r == r1 { |
| return 0 |
| } |
| } |
| } |
| return noMatch |
|
|
| case 2: |
| if r >= rune[0] && r <= rune[1] { |
| return 0 |
| } |
| return noMatch |
|
|
| case 4, 6, 8: |
| |
| |
| for j := 0; j < len(rune); j += 2 { |
| if r < rune[j] { |
| return noMatch |
| } |
| if r <= rune[j+1] { |
| return j / 2 |
| } |
| } |
| return noMatch |
| } |
|
|
| |
| lo := 0 |
| hi := len(rune) / 2 |
| for lo < hi { |
| m := int(uint(lo+hi) >> 1) |
| if c := rune[2*m]; c <= r { |
| if r <= rune[2*m+1] { |
| return m |
| } |
| lo = m + 1 |
| } else { |
| hi = m |
| } |
| } |
| return noMatch |
| } |
|
|
| |
| |
| |
| func (i *Inst) MatchEmptyWidth(before rune, after rune) bool { |
| switch EmptyOp(i.Arg) { |
| case EmptyBeginLine: |
| return before == '\n' || before == -1 |
| case EmptyEndLine: |
| return after == '\n' || after == -1 |
| case EmptyBeginText: |
| return before == -1 |
| case EmptyEndText: |
| return after == -1 |
| case EmptyWordBoundary: |
| return IsWordChar(before) != IsWordChar(after) |
| case EmptyNoWordBoundary: |
| return IsWordChar(before) == IsWordChar(after) |
| } |
| panic("unknown empty width arg") |
| } |
|
|
| func (i *Inst) String() string { |
| var b strings.Builder |
| dumpInst(&b, i) |
| return b.String() |
| } |
|
|
| func bw(b *strings.Builder, args ...string) { |
| for _, s := range args { |
| b.WriteString(s) |
| } |
| } |
|
|
| func dumpProg(b *strings.Builder, p *Prog) { |
| for j := range p.Inst { |
| i := &p.Inst[j] |
| pc := strconv.Itoa(j) |
| if len(pc) < 3 { |
| b.WriteString(" "[len(pc):]) |
| } |
| if j == p.Start { |
| pc += "*" |
| } |
| bw(b, pc, "\t") |
| dumpInst(b, i) |
| bw(b, "\n") |
| } |
| } |
|
|
| func u32(i uint32) string { |
| return strconv.FormatUint(uint64(i), 10) |
| } |
|
|
| func dumpInst(b *strings.Builder, i *Inst) { |
| switch i.Op { |
| case InstAlt: |
| bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg)) |
| case InstAltMatch: |
| bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg)) |
| case InstCapture: |
| bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out)) |
| case InstEmptyWidth: |
| bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out)) |
| case InstMatch: |
| bw(b, "match") |
| case InstFail: |
| bw(b, "fail") |
| case InstNop: |
| bw(b, "nop -> ", u32(i.Out)) |
| case InstRune: |
| if i.Rune == nil { |
| |
| bw(b, "rune <nil>") |
| } |
| bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune))) |
| if Flags(i.Arg)&FoldCase != 0 { |
| bw(b, "/i") |
| } |
| bw(b, " -> ", u32(i.Out)) |
| case InstRune1: |
| bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out)) |
| case InstRuneAny: |
| bw(b, "any -> ", u32(i.Out)) |
| case InstRuneAnyNotNL: |
| bw(b, "anynotnl -> ", u32(i.Out)) |
| } |
| } |
|
|