| | |
| | |
| | |
| |
|
| | package regexp |
| |
|
| | import ( |
| | "io" |
| | "regexp/syntax" |
| | "sync" |
| | ) |
| |
|
| | |
| | |
| | type queue struct { |
| | sparse []uint32 |
| | dense []entry |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | type entry struct { |
| | pc uint32 |
| | t *thread |
| | } |
| |
|
| | |
| | |
| | |
| | type thread struct { |
| | inst *syntax.Inst |
| | cap []int |
| | } |
| |
|
| | |
| | type machine struct { |
| | re *Regexp |
| | p *syntax.Prog |
| | q0, q1 queue |
| | pool []*thread |
| | matched bool |
| | matchcap []int |
| |
|
| | inputs inputs |
| | } |
| |
|
| | type inputs struct { |
| | |
| | bytes inputBytes |
| | string inputString |
| | reader inputReader |
| | } |
| |
|
| | func (i *inputs) newBytes(b []byte) input { |
| | i.bytes.str = b |
| | return &i.bytes |
| | } |
| |
|
| | func (i *inputs) newString(s string) input { |
| | i.string.str = s |
| | return &i.string |
| | } |
| |
|
| | func (i *inputs) newReader(r io.RuneReader) input { |
| | i.reader.r = r |
| | i.reader.atEOT = false |
| | i.reader.pos = 0 |
| | return &i.reader |
| | } |
| |
|
| | func (i *inputs) clear() { |
| | |
| | |
| | if i.bytes.str != nil { |
| | i.bytes.str = nil |
| | } else if i.reader.r != nil { |
| | i.reader.r = nil |
| | } else { |
| | i.string.str = "" |
| | } |
| | } |
| |
|
| | func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) { |
| | if r != nil { |
| | return i.newReader(r), 0 |
| | } |
| | if b != nil { |
| | return i.newBytes(b), len(b) |
| | } |
| | return i.newString(s), len(s) |
| | } |
| |
|
| | func (m *machine) init(ncap int) { |
| | for _, t := range m.pool { |
| | t.cap = t.cap[:ncap] |
| | } |
| | m.matchcap = m.matchcap[:ncap] |
| | } |
| |
|
| | |
| | |
| | func (m *machine) alloc(i *syntax.Inst) *thread { |
| | var t *thread |
| | if n := len(m.pool); n > 0 { |
| | t = m.pool[n-1] |
| | m.pool = m.pool[:n-1] |
| | } else { |
| | t = new(thread) |
| | t.cap = make([]int, len(m.matchcap), cap(m.matchcap)) |
| | } |
| | t.inst = i |
| | return t |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | type lazyFlag uint64 |
| |
|
| | func newLazyFlag(r1, r2 rune) lazyFlag { |
| | return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2))) |
| | } |
| |
|
| | func (f lazyFlag) match(op syntax.EmptyOp) bool { |
| | if op == 0 { |
| | return true |
| | } |
| | r1 := rune(f >> 32) |
| | if op&syntax.EmptyBeginLine != 0 { |
| | if r1 != '\n' && r1 >= 0 { |
| | return false |
| | } |
| | op &^= syntax.EmptyBeginLine |
| | } |
| | if op&syntax.EmptyBeginText != 0 { |
| | if r1 >= 0 { |
| | return false |
| | } |
| | op &^= syntax.EmptyBeginText |
| | } |
| | if op == 0 { |
| | return true |
| | } |
| | r2 := rune(f) |
| | if op&syntax.EmptyEndLine != 0 { |
| | if r2 != '\n' && r2 >= 0 { |
| | return false |
| | } |
| | op &^= syntax.EmptyEndLine |
| | } |
| | if op&syntax.EmptyEndText != 0 { |
| | if r2 >= 0 { |
| | return false |
| | } |
| | op &^= syntax.EmptyEndText |
| | } |
| | if op == 0 { |
| | return true |
| | } |
| | if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) { |
| | op &^= syntax.EmptyWordBoundary |
| | } else { |
| | op &^= syntax.EmptyNoWordBoundary |
| | } |
| | return op == 0 |
| | } |
| |
|
| | |
| | |
| | |
| | func (m *machine) match(i input, pos int) bool { |
| | startCond := m.re.cond |
| | if startCond == ^syntax.EmptyOp(0) { |
| | return false |
| | } |
| | m.matched = false |
| | for i := range m.matchcap { |
| | m.matchcap[i] = -1 |
| | } |
| | runq, nextq := &m.q0, &m.q1 |
| | r, r1 := endOfText, endOfText |
| | width, width1 := 0, 0 |
| | r, width = i.step(pos) |
| | if r != endOfText { |
| | r1, width1 = i.step(pos + width) |
| | } |
| | var flag lazyFlag |
| | if pos == 0 { |
| | flag = newLazyFlag(-1, r) |
| | } else { |
| | flag = i.context(pos) |
| | } |
| | for { |
| | if len(runq.dense) == 0 { |
| | if startCond&syntax.EmptyBeginText != 0 && pos != 0 { |
| | |
| | break |
| | } |
| | if m.matched { |
| | |
| | break |
| | } |
| | if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() { |
| | |
| | advance := i.index(m.re, pos) |
| | if advance < 0 { |
| | break |
| | } |
| | pos += advance |
| | r, width = i.step(pos) |
| | r1, width1 = i.step(pos + width) |
| | } |
| | } |
| | if !m.matched { |
| | if len(m.matchcap) > 0 { |
| | m.matchcap[0] = pos |
| | } |
| | m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil) |
| | } |
| | flag = newLazyFlag(r, r1) |
| | m.step(runq, nextq, pos, pos+width, r, &flag) |
| | if width == 0 { |
| | break |
| | } |
| | if len(m.matchcap) == 0 && m.matched { |
| | |
| | |
| | break |
| | } |
| | pos += width |
| | r, width = r1, width1 |
| | if r != endOfText { |
| | r1, width1 = i.step(pos + width) |
| | } |
| | runq, nextq = nextq, runq |
| | } |
| | m.clear(nextq) |
| | return m.matched |
| | } |
| |
|
| | |
| | func (m *machine) clear(q *queue) { |
| | for _, d := range q.dense { |
| | if d.t != nil { |
| | m.pool = append(m.pool, d.t) |
| | } |
| | } |
| | q.dense = q.dense[:0] |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) { |
| | longest := m.re.longest |
| | for j := 0; j < len(runq.dense); j++ { |
| | d := &runq.dense[j] |
| | t := d.t |
| | if t == nil { |
| | continue |
| | } |
| | if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] { |
| | m.pool = append(m.pool, t) |
| | continue |
| | } |
| | i := t.inst |
| | add := false |
| | switch i.Op { |
| | default: |
| | panic("bad inst") |
| |
|
| | case syntax.InstMatch: |
| | if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) { |
| | t.cap[1] = pos |
| | copy(m.matchcap, t.cap) |
| | } |
| | if !longest { |
| | |
| | for _, d := range runq.dense[j+1:] { |
| | if d.t != nil { |
| | m.pool = append(m.pool, d.t) |
| | } |
| | } |
| | runq.dense = runq.dense[:0] |
| | } |
| | m.matched = true |
| |
|
| | case syntax.InstRune: |
| | add = i.MatchRune(c) |
| | case syntax.InstRune1: |
| | add = c == i.Rune[0] |
| | case syntax.InstRuneAny: |
| | add = true |
| | case syntax.InstRuneAnyNotNL: |
| | add = c != '\n' |
| | } |
| | if add { |
| | t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t) |
| | } |
| | if t != nil { |
| | m.pool = append(m.pool, t) |
| | } |
| | } |
| | runq.dense = runq.dense[:0] |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread { |
| | Again: |
| | if pc == 0 { |
| | return t |
| | } |
| | if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc { |
| | return t |
| | } |
| |
|
| | j := len(q.dense) |
| | q.dense = q.dense[:j+1] |
| | d := &q.dense[j] |
| | d.t = nil |
| | d.pc = pc |
| | q.sparse[pc] = uint32(j) |
| |
|
| | i := &m.p.Inst[pc] |
| | switch i.Op { |
| | default: |
| | panic("unhandled") |
| | case syntax.InstFail: |
| | |
| | case syntax.InstAlt, syntax.InstAltMatch: |
| | t = m.add(q, i.Out, pos, cap, cond, t) |
| | pc = i.Arg |
| | goto Again |
| | case syntax.InstEmptyWidth: |
| | if cond.match(syntax.EmptyOp(i.Arg)) { |
| | pc = i.Out |
| | goto Again |
| | } |
| | case syntax.InstNop: |
| | pc = i.Out |
| | goto Again |
| | case syntax.InstCapture: |
| | if int(i.Arg) < len(cap) { |
| | opos := cap[i.Arg] |
| | cap[i.Arg] = pos |
| | m.add(q, i.Out, pos, cap, cond, nil) |
| | cap[i.Arg] = opos |
| | } else { |
| | pc = i.Out |
| | goto Again |
| | } |
| | case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: |
| | if t == nil { |
| | t = m.alloc(i) |
| | } else { |
| | t.inst = i |
| | } |
| | if len(cap) > 0 && &t.cap[0] != &cap[0] { |
| | copy(t.cap, cap) |
| | } |
| | d.t = t |
| | t = nil |
| | } |
| | return t |
| | } |
| |
|
| | type onePassMachine struct { |
| | inputs inputs |
| | matchcap []int |
| | } |
| |
|
| | var onePassPool sync.Pool |
| |
|
| | func newOnePassMachine() *onePassMachine { |
| | m, ok := onePassPool.Get().(*onePassMachine) |
| | if !ok { |
| | m = new(onePassMachine) |
| | } |
| | return m |
| | } |
| |
|
| | func freeOnePassMachine(m *onePassMachine) { |
| | m.inputs.clear() |
| | onePassPool.Put(m) |
| | } |
| |
|
| | |
| | func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int { |
| | startCond := re.cond |
| | if startCond == ^syntax.EmptyOp(0) { |
| | return nil |
| | } |
| |
|
| | m := newOnePassMachine() |
| | if cap(m.matchcap) < ncap { |
| | m.matchcap = make([]int, ncap) |
| | } else { |
| | m.matchcap = m.matchcap[:ncap] |
| | } |
| |
|
| | matched := false |
| | for i := range m.matchcap { |
| | m.matchcap[i] = -1 |
| | } |
| |
|
| | i, _ := m.inputs.init(ir, ib, is) |
| |
|
| | r, r1 := endOfText, endOfText |
| | width, width1 := 0, 0 |
| | r, width = i.step(pos) |
| | if r != endOfText { |
| | r1, width1 = i.step(pos + width) |
| | } |
| | var flag lazyFlag |
| | if pos == 0 { |
| | flag = newLazyFlag(-1, r) |
| | } else { |
| | flag = i.context(pos) |
| | } |
| | pc := re.onepass.Start |
| | inst := &re.onepass.Inst[pc] |
| | |
| | if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) && |
| | len(re.prefix) > 0 && i.canCheckPrefix() { |
| | |
| | if !i.hasPrefix(re) { |
| | goto Return |
| | } |
| | pos += len(re.prefix) |
| | r, width = i.step(pos) |
| | r1, width1 = i.step(pos + width) |
| | flag = i.context(pos) |
| | pc = int(re.prefixEnd) |
| | } |
| | for { |
| | inst = &re.onepass.Inst[pc] |
| | pc = int(inst.Out) |
| | switch inst.Op { |
| | default: |
| | panic("bad inst") |
| | case syntax.InstMatch: |
| | matched = true |
| | if len(m.matchcap) > 0 { |
| | m.matchcap[0] = 0 |
| | m.matchcap[1] = pos |
| | } |
| | goto Return |
| | case syntax.InstRune: |
| | if !inst.MatchRune(r) { |
| | goto Return |
| | } |
| | case syntax.InstRune1: |
| | if r != inst.Rune[0] { |
| | goto Return |
| | } |
| | case syntax.InstRuneAny: |
| | |
| | case syntax.InstRuneAnyNotNL: |
| | if r == '\n' { |
| | goto Return |
| | } |
| | |
| | case syntax.InstAlt, syntax.InstAltMatch: |
| | pc = int(onePassNext(inst, r)) |
| | continue |
| | case syntax.InstFail: |
| | goto Return |
| | case syntax.InstNop: |
| | continue |
| | case syntax.InstEmptyWidth: |
| | if !flag.match(syntax.EmptyOp(inst.Arg)) { |
| | goto Return |
| | } |
| | continue |
| | case syntax.InstCapture: |
| | if int(inst.Arg) < len(m.matchcap) { |
| | m.matchcap[inst.Arg] = pos |
| | } |
| | continue |
| | } |
| | if width == 0 { |
| | break |
| | } |
| | flag = newLazyFlag(r, r1) |
| | pos += width |
| | r, width = r1, width1 |
| | if r != endOfText { |
| | r1, width1 = i.step(pos + width) |
| | } |
| | } |
| |
|
| | Return: |
| | if !matched { |
| | freeOnePassMachine(m) |
| | return nil |
| | } |
| |
|
| | dstCap = append(dstCap, m.matchcap...) |
| | freeOnePassMachine(m) |
| | return dstCap |
| | } |
| |
|
| | |
| | func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool { |
| | return re.doExecute(r, b, s, 0, 0, nil) != nil |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int { |
| | if dstCap == nil { |
| | |
| | dstCap = arrayNoInts[:0:0] |
| | } |
| |
|
| | if r == nil && len(b)+len(s) < re.minInputLen { |
| | return nil |
| | } |
| |
|
| | if re.onepass != nil { |
| | return re.doOnePass(r, b, s, pos, ncap, dstCap) |
| | } |
| | if r == nil && len(b)+len(s) < re.maxBitStateLen { |
| | return re.backtrack(b, s, pos, ncap, dstCap) |
| | } |
| |
|
| | m := re.get() |
| | i, _ := m.inputs.init(r, b, s) |
| |
|
| | m.init(ncap) |
| | if !m.match(i, pos) { |
| | re.put(m) |
| | return nil |
| | } |
| |
|
| | dstCap = append(dstCap, m.matchcap...) |
| | re.put(m) |
| | return dstCap |
| | } |
| |
|
| | |
| | |
| | var arrayNoInts [0]int |
| |
|