| | |
| | |
| | |
| |
|
| | package syntax |
| |
|
| | import ( |
| | "sort" |
| | "strings" |
| | "sync" |
| | "unicode" |
| | "unicode/utf8" |
| | ) |
| |
|
| | |
| | |
| | type Error struct { |
| | Code ErrorCode |
| | Expr string |
| | } |
| |
|
| | func (e *Error) Error() string { |
| | return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`" |
| | } |
| |
|
| | |
| | type ErrorCode string |
| |
|
| | const ( |
| | |
| | ErrInternalError ErrorCode = "regexp/syntax: internal error" |
| |
|
| | |
| | ErrInvalidCharClass ErrorCode = "invalid character class" |
| | ErrInvalidCharRange ErrorCode = "invalid character class range" |
| | ErrInvalidEscape ErrorCode = "invalid escape sequence" |
| | ErrInvalidNamedCapture ErrorCode = "invalid named capture" |
| | ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax" |
| | ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator" |
| | ErrInvalidRepeatSize ErrorCode = "invalid repeat count" |
| | ErrInvalidUTF8 ErrorCode = "invalid UTF-8" |
| | ErrMissingBracket ErrorCode = "missing closing ]" |
| | ErrMissingParen ErrorCode = "missing closing )" |
| | ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator" |
| | ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression" |
| | ErrUnexpectedParen ErrorCode = "unexpected )" |
| | ErrNestingDepth ErrorCode = "expression nests too deeply" |
| | ErrLarge ErrorCode = "expression too large" |
| | ) |
| |
|
| | func (e ErrorCode) String() string { |
| | return string(e) |
| | } |
| |
|
| | |
| | type Flags uint16 |
| |
|
| | const ( |
| | FoldCase Flags = 1 << iota |
| | Literal |
| | ClassNL |
| | DotNL |
| | OneLine |
| | NonGreedy |
| | PerlX |
| | UnicodeGroups |
| | WasDollar |
| | Simple |
| |
|
| | MatchNL = ClassNL | DotNL |
| |
|
| | Perl = ClassNL | OneLine | PerlX | UnicodeGroups |
| | POSIX Flags = 0 |
| | ) |
| |
|
| | |
| | const ( |
| | opLeftParen = opPseudo + iota |
| | opVerticalBar |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const maxHeight = 1000 |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | const ( |
| | maxSize = 128 << 20 / instSize |
| | instSize = 5 * 8 |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const ( |
| | maxRunes = 128 << 20 / runeSize |
| | runeSize = 4 |
| | ) |
| |
|
| | type parser struct { |
| | flags Flags |
| | stack []*Regexp |
| | free *Regexp |
| | numCap int |
| | wholeRegexp string |
| | tmpClass []rune |
| | numRegexp int |
| | numRunes int |
| | repeats int64 |
| | height map[*Regexp]int |
| | size map[*Regexp]int64 |
| | } |
| |
|
| | func (p *parser) newRegexp(op Op) *Regexp { |
| | re := p.free |
| | if re != nil { |
| | p.free = re.Sub0[0] |
| | *re = Regexp{} |
| | } else { |
| | re = new(Regexp) |
| | p.numRegexp++ |
| | } |
| | re.Op = op |
| | return re |
| | } |
| |
|
| | func (p *parser) reuse(re *Regexp) { |
| | if p.height != nil { |
| | delete(p.height, re) |
| | } |
| | re.Sub0[0] = p.free |
| | p.free = re |
| | } |
| |
|
| | func (p *parser) checkLimits(re *Regexp) { |
| | if p.numRunes > maxRunes { |
| | panic(ErrLarge) |
| | } |
| | p.checkSize(re) |
| | p.checkHeight(re) |
| | } |
| |
|
| | func (p *parser) checkSize(re *Regexp) { |
| | if p.size == nil { |
| | |
| | |
| | |
| | |
| | |
| | if p.repeats == 0 { |
| | p.repeats = 1 |
| | } |
| | if re.Op == OpRepeat { |
| | n := re.Max |
| | if n == -1 { |
| | n = re.Min |
| | } |
| | if n <= 0 { |
| | n = 1 |
| | } |
| | if int64(n) > maxSize/p.repeats { |
| | p.repeats = maxSize |
| | } else { |
| | p.repeats *= int64(n) |
| | } |
| | } |
| | if int64(p.numRegexp) < maxSize/p.repeats { |
| | return |
| | } |
| |
|
| | |
| | |
| | |
| | p.size = make(map[*Regexp]int64) |
| | for _, re := range p.stack { |
| | p.checkSize(re) |
| | } |
| | } |
| |
|
| | if p.calcSize(re, true) > maxSize { |
| | panic(ErrLarge) |
| | } |
| | } |
| |
|
| | func (p *parser) calcSize(re *Regexp, force bool) int64 { |
| | if !force { |
| | if size, ok := p.size[re]; ok { |
| | return size |
| | } |
| | } |
| |
|
| | var size int64 |
| | switch re.Op { |
| | case OpLiteral: |
| | size = int64(len(re.Rune)) |
| | case OpCapture, OpStar: |
| | |
| | size = 2 + p.calcSize(re.Sub[0], false) |
| | case OpPlus, OpQuest: |
| | size = 1 + p.calcSize(re.Sub[0], false) |
| | case OpConcat: |
| | for _, sub := range re.Sub { |
| | size += p.calcSize(sub, false) |
| | } |
| | case OpAlternate: |
| | for _, sub := range re.Sub { |
| | size += p.calcSize(sub, false) |
| | } |
| | if len(re.Sub) > 1 { |
| | size += int64(len(re.Sub)) - 1 |
| | } |
| | case OpRepeat: |
| | sub := p.calcSize(re.Sub[0], false) |
| | if re.Max == -1 { |
| | if re.Min == 0 { |
| | size = 2 + sub |
| | } else { |
| | size = 1 + int64(re.Min)*sub |
| | } |
| | break |
| | } |
| | |
| | size = int64(re.Max)*sub + int64(re.Max-re.Min) |
| | } |
| |
|
| | size = max(1, size) |
| | p.size[re] = size |
| | return size |
| | } |
| |
|
| | func (p *parser) checkHeight(re *Regexp) { |
| | if p.numRegexp < maxHeight { |
| | return |
| | } |
| | if p.height == nil { |
| | p.height = make(map[*Regexp]int) |
| | for _, re := range p.stack { |
| | p.checkHeight(re) |
| | } |
| | } |
| | if p.calcHeight(re, true) > maxHeight { |
| | panic(ErrNestingDepth) |
| | } |
| | } |
| |
|
| | func (p *parser) calcHeight(re *Regexp, force bool) int { |
| | if !force { |
| | if h, ok := p.height[re]; ok { |
| | return h |
| | } |
| | } |
| | h := 1 |
| | for _, sub := range re.Sub { |
| | hsub := p.calcHeight(sub, false) |
| | if h < 1+hsub { |
| | h = 1 + hsub |
| | } |
| | } |
| | p.height[re] = h |
| | return h |
| | } |
| |
|
| | |
| |
|
| | |
| | func (p *parser) push(re *Regexp) *Regexp { |
| | p.numRunes += len(re.Rune) |
| | if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] { |
| | |
| | if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) { |
| | return nil |
| | } |
| | re.Op = OpLiteral |
| | re.Rune = re.Rune[:1] |
| | re.Flags = p.flags &^ FoldCase |
| | } else if re.Op == OpCharClass && len(re.Rune) == 4 && |
| | re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] && |
| | unicode.SimpleFold(re.Rune[0]) == re.Rune[2] && |
| | unicode.SimpleFold(re.Rune[2]) == re.Rune[0] || |
| | re.Op == OpCharClass && len(re.Rune) == 2 && |
| | re.Rune[0]+1 == re.Rune[1] && |
| | unicode.SimpleFold(re.Rune[0]) == re.Rune[1] && |
| | unicode.SimpleFold(re.Rune[1]) == re.Rune[0] { |
| | |
| | if p.maybeConcat(re.Rune[0], p.flags|FoldCase) { |
| | return nil |
| | } |
| |
|
| | |
| | re.Op = OpLiteral |
| | re.Rune = re.Rune[:1] |
| | re.Flags = p.flags | FoldCase |
| | } else { |
| | |
| | p.maybeConcat(-1, 0) |
| | } |
| |
|
| | p.stack = append(p.stack, re) |
| | p.checkLimits(re) |
| | return re |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | func (p *parser) maybeConcat(r rune, flags Flags) bool { |
| | n := len(p.stack) |
| | if n < 2 { |
| | return false |
| | } |
| |
|
| | re1 := p.stack[n-1] |
| | re2 := p.stack[n-2] |
| | if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase { |
| | return false |
| | } |
| |
|
| | |
| | re2.Rune = append(re2.Rune, re1.Rune...) |
| |
|
| | |
| | if r >= 0 { |
| | re1.Rune = re1.Rune0[:1] |
| | re1.Rune[0] = r |
| | re1.Flags = flags |
| | return true |
| | } |
| |
|
| | p.stack = p.stack[:n-1] |
| | p.reuse(re1) |
| | return false |
| | } |
| |
|
| | |
| | func (p *parser) literal(r rune) { |
| | re := p.newRegexp(OpLiteral) |
| | re.Flags = p.flags |
| | if p.flags&FoldCase != 0 { |
| | r = minFoldRune(r) |
| | } |
| | re.Rune0[0] = r |
| | re.Rune = re.Rune0[:1] |
| | p.push(re) |
| | } |
| |
|
| | |
| | func minFoldRune(r rune) rune { |
| | if r < minFold || r > maxFold { |
| | return r |
| | } |
| | m := r |
| | r0 := r |
| | for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) { |
| | m = min(m, r) |
| | } |
| | return m |
| | } |
| |
|
| | |
| | |
| | func (p *parser) op(op Op) *Regexp { |
| | re := p.newRegexp(op) |
| | re.Flags = p.flags |
| | return p.push(re) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) { |
| | flags := p.flags |
| | if p.flags&PerlX != 0 { |
| | if len(after) > 0 && after[0] == '?' { |
| | after = after[1:] |
| | flags ^= NonGreedy |
| | } |
| | if lastRepeat != "" { |
| | |
| | |
| | |
| | return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]} |
| | } |
| | } |
| | n := len(p.stack) |
| | if n == 0 { |
| | return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]} |
| | } |
| | sub := p.stack[n-1] |
| | if sub.Op >= opPseudo { |
| | return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]} |
| | } |
| |
|
| | re := p.newRegexp(op) |
| | re.Min = min |
| | re.Max = max |
| | re.Flags = flags |
| | re.Sub = re.Sub0[:1] |
| | re.Sub[0] = sub |
| | p.stack[n-1] = re |
| | p.checkLimits(re) |
| |
|
| | if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) { |
| | return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]} |
| | } |
| |
|
| | return after, nil |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | func repeatIsValid(re *Regexp, n int) bool { |
| | if re.Op == OpRepeat { |
| | m := re.Max |
| | if m == 0 { |
| | return true |
| | } |
| | if m < 0 { |
| | m = re.Min |
| | } |
| | if m > n { |
| | return false |
| | } |
| | if m > 0 { |
| | n /= m |
| | } |
| | } |
| | for _, sub := range re.Sub { |
| | if !repeatIsValid(sub, n) { |
| | return false |
| | } |
| | } |
| | return true |
| | } |
| |
|
| | |
| | func (p *parser) concat() *Regexp { |
| | p.maybeConcat(-1, 0) |
| |
|
| | |
| | i := len(p.stack) |
| | for i > 0 && p.stack[i-1].Op < opPseudo { |
| | i-- |
| | } |
| | subs := p.stack[i:] |
| | p.stack = p.stack[:i] |
| |
|
| | |
| | if len(subs) == 0 { |
| | return p.push(p.newRegexp(OpEmptyMatch)) |
| | } |
| |
|
| | return p.push(p.collapse(subs, OpConcat)) |
| | } |
| |
|
| | |
| | func (p *parser) alternate() *Regexp { |
| | |
| | |
| | i := len(p.stack) |
| | for i > 0 && p.stack[i-1].Op < opPseudo { |
| | i-- |
| | } |
| | subs := p.stack[i:] |
| | p.stack = p.stack[:i] |
| |
|
| | |
| | |
| | if len(subs) > 0 { |
| | cleanAlt(subs[len(subs)-1]) |
| | } |
| |
|
| | |
| | |
| | if len(subs) == 0 { |
| | return p.push(p.newRegexp(OpNoMatch)) |
| | } |
| |
|
| | return p.push(p.collapse(subs, OpAlternate)) |
| | } |
| |
|
| | |
| | func cleanAlt(re *Regexp) { |
| | switch re.Op { |
| | case OpCharClass: |
| | re.Rune = cleanClass(&re.Rune) |
| | if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune { |
| | re.Rune = nil |
| | re.Op = OpAnyChar |
| | return |
| | } |
| | if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune { |
| | re.Rune = nil |
| | re.Op = OpAnyCharNotNL |
| | return |
| | } |
| | if cap(re.Rune)-len(re.Rune) > 100 { |
| | |
| | |
| | re.Rune = append(re.Rune0[:0], re.Rune...) |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { |
| | if len(subs) == 1 { |
| | return subs[0] |
| | } |
| | re := p.newRegexp(op) |
| | re.Sub = re.Sub0[:0] |
| | for _, sub := range subs { |
| | if sub.Op == op { |
| | re.Sub = append(re.Sub, sub.Sub...) |
| | p.reuse(sub) |
| | } else { |
| | re.Sub = append(re.Sub, sub) |
| | } |
| | } |
| | if op == OpAlternate { |
| | re.Sub = p.factor(re.Sub) |
| | if len(re.Sub) == 1 { |
| | old := re |
| | re = re.Sub[0] |
| | p.reuse(old) |
| | } |
| | } |
| | return re |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | func (p *parser) factor(sub []*Regexp) []*Regexp { |
| | if len(sub) < 2 { |
| | return sub |
| | } |
| |
|
| | |
| | var str []rune |
| | var strflags Flags |
| | start := 0 |
| | out := sub[:0] |
| | for i := 0; i <= len(sub); i++ { |
| | |
| | |
| | |
| | |
| | |
| | |
| | var istr []rune |
| | var iflags Flags |
| | if i < len(sub) { |
| | istr, iflags = p.leadingString(sub[i]) |
| | if iflags == strflags { |
| | same := 0 |
| | for same < len(str) && same < len(istr) && str[same] == istr[same] { |
| | same++ |
| | } |
| | if same > 0 { |
| | |
| | |
| | str = str[:same] |
| | continue |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if i == start { |
| | |
| | } else if i == start+1 { |
| | |
| | out = append(out, sub[start]) |
| | } else { |
| | |
| | prefix := p.newRegexp(OpLiteral) |
| | prefix.Flags = strflags |
| | prefix.Rune = append(prefix.Rune[:0], str...) |
| |
|
| | for j := start; j < i; j++ { |
| | sub[j] = p.removeLeadingString(sub[j], len(str)) |
| | p.checkLimits(sub[j]) |
| | } |
| | suffix := p.collapse(sub[start:i], OpAlternate) |
| |
|
| | re := p.newRegexp(OpConcat) |
| | re.Sub = append(re.Sub[:0], prefix, suffix) |
| | out = append(out, re) |
| | } |
| |
|
| | |
| | start = i |
| | str = istr |
| | strflags = iflags |
| | } |
| | sub = out |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | start = 0 |
| | out = sub[:0] |
| | var first *Regexp |
| | for i := 0; i <= len(sub); i++ { |
| | |
| | |
| | |
| | |
| | |
| | var ifirst *Regexp |
| | if i < len(sub) { |
| | ifirst = p.leadingRegexp(sub[i]) |
| | if first != nil && first.Equal(ifirst) && |
| | |
| | (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) { |
| | continue |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | if i == start { |
| | |
| | } else if i == start+1 { |
| | |
| | out = append(out, sub[start]) |
| | } else { |
| | |
| | prefix := first |
| | for j := start; j < i; j++ { |
| | reuse := j != start |
| | sub[j] = p.removeLeadingRegexp(sub[j], reuse) |
| | p.checkLimits(sub[j]) |
| | } |
| | suffix := p.collapse(sub[start:i], OpAlternate) |
| |
|
| | re := p.newRegexp(OpConcat) |
| | re.Sub = append(re.Sub[:0], prefix, suffix) |
| | out = append(out, re) |
| | } |
| |
|
| | |
| | start = i |
| | first = ifirst |
| | } |
| | sub = out |
| |
|
| | |
| | start = 0 |
| | out = sub[:0] |
| | for i := 0; i <= len(sub); i++ { |
| | |
| | |
| | |
| | |
| | |
| | |
| | if i < len(sub) && isCharClass(sub[i]) { |
| | continue |
| | } |
| |
|
| | |
| | |
| | if i == start { |
| | |
| | } else if i == start+1 { |
| | out = append(out, sub[start]) |
| | } else { |
| | |
| | |
| | max := start |
| | for j := start + 1; j < i; j++ { |
| | if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) { |
| | max = j |
| | } |
| | } |
| | sub[start], sub[max] = sub[max], sub[start] |
| |
|
| | for j := start + 1; j < i; j++ { |
| | mergeCharClass(sub[start], sub[j]) |
| | p.reuse(sub[j]) |
| | } |
| | cleanAlt(sub[start]) |
| | out = append(out, sub[start]) |
| | } |
| |
|
| | |
| | if i < len(sub) { |
| | out = append(out, sub[i]) |
| | } |
| | start = i + 1 |
| | } |
| | sub = out |
| |
|
| | |
| | start = 0 |
| | out = sub[:0] |
| | for i := range sub { |
| | if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch { |
| | continue |
| | } |
| | out = append(out, sub[i]) |
| | } |
| | sub = out |
| |
|
| | return sub |
| | } |
| |
|
| | |
| | |
| | func (p *parser) leadingString(re *Regexp) ([]rune, Flags) { |
| | if re.Op == OpConcat && len(re.Sub) > 0 { |
| | re = re.Sub[0] |
| | } |
| | if re.Op != OpLiteral { |
| | return nil, 0 |
| | } |
| | return re.Rune, re.Flags & FoldCase |
| | } |
| |
|
| | |
| | |
| | func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp { |
| | if re.Op == OpConcat && len(re.Sub) > 0 { |
| | |
| | |
| | sub := re.Sub[0] |
| | sub = p.removeLeadingString(sub, n) |
| | re.Sub[0] = sub |
| | if sub.Op == OpEmptyMatch { |
| | p.reuse(sub) |
| | switch len(re.Sub) { |
| | case 0, 1: |
| | |
| | re.Op = OpEmptyMatch |
| | re.Sub = nil |
| | case 2: |
| | old := re |
| | re = re.Sub[1] |
| | p.reuse(old) |
| | default: |
| | copy(re.Sub, re.Sub[1:]) |
| | re.Sub = re.Sub[:len(re.Sub)-1] |
| | } |
| | } |
| | return re |
| | } |
| |
|
| | if re.Op == OpLiteral { |
| | re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])] |
| | if len(re.Rune) == 0 { |
| | re.Op = OpEmptyMatch |
| | } |
| | } |
| | return re |
| | } |
| |
|
| | |
| | |
| | func (p *parser) leadingRegexp(re *Regexp) *Regexp { |
| | if re.Op == OpEmptyMatch { |
| | return nil |
| | } |
| | if re.Op == OpConcat && len(re.Sub) > 0 { |
| | sub := re.Sub[0] |
| | if sub.Op == OpEmptyMatch { |
| | return nil |
| | } |
| | return sub |
| | } |
| | return re |
| | } |
| |
|
| | |
| | |
| | |
| | func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp { |
| | if re.Op == OpConcat && len(re.Sub) > 0 { |
| | if reuse { |
| | p.reuse(re.Sub[0]) |
| | } |
| | re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])] |
| | switch len(re.Sub) { |
| | case 0: |
| | re.Op = OpEmptyMatch |
| | re.Sub = nil |
| | case 1: |
| | old := re |
| | re = re.Sub[0] |
| | p.reuse(old) |
| | } |
| | return re |
| | } |
| | if reuse { |
| | p.reuse(re) |
| | } |
| | return p.newRegexp(OpEmptyMatch) |
| | } |
| |
|
| | func literalRegexp(s string, flags Flags) *Regexp { |
| | re := &Regexp{Op: OpLiteral} |
| | re.Flags = flags |
| | re.Rune = re.Rune0[:0] |
| | for _, c := range s { |
| | if len(re.Rune) >= cap(re.Rune) { |
| | |
| | re.Rune = []rune(s) |
| | break |
| | } |
| | re.Rune = append(re.Rune, c) |
| | } |
| | return re |
| | } |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | func Parse(s string, flags Flags) (*Regexp, error) { |
| | return parse(s, flags) |
| | } |
| |
|
| | func parse(s string, flags Flags) (_ *Regexp, err error) { |
| | defer func() { |
| | switch r := recover(); r { |
| | default: |
| | panic(r) |
| | case nil: |
| | |
| | case ErrLarge: |
| | err = &Error{Code: ErrLarge, Expr: s} |
| | case ErrNestingDepth: |
| | err = &Error{Code: ErrNestingDepth, Expr: s} |
| | } |
| | }() |
| |
|
| | if flags&Literal != 0 { |
| | |
| | if err := checkUTF8(s); err != nil { |
| | return nil, err |
| | } |
| | return literalRegexp(s, flags), nil |
| | } |
| |
|
| | |
| | var ( |
| | p parser |
| | c rune |
| | op Op |
| | lastRepeat string |
| | ) |
| | p.flags = flags |
| | p.wholeRegexp = s |
| | t := s |
| | for t != "" { |
| | repeat := "" |
| | BigSwitch: |
| | switch t[0] { |
| | default: |
| | if c, t, err = nextRune(t); err != nil { |
| | return nil, err |
| | } |
| | p.literal(c) |
| |
|
| | case '(': |
| | if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' { |
| | |
| | if t, err = p.parsePerlFlags(t); err != nil { |
| | return nil, err |
| | } |
| | break |
| | } |
| | p.numCap++ |
| | p.op(opLeftParen).Cap = p.numCap |
| | t = t[1:] |
| | case '|': |
| | p.parseVerticalBar() |
| | t = t[1:] |
| | case ')': |
| | if err = p.parseRightParen(); err != nil { |
| | return nil, err |
| | } |
| | t = t[1:] |
| | case '^': |
| | if p.flags&OneLine != 0 { |
| | p.op(OpBeginText) |
| | } else { |
| | p.op(OpBeginLine) |
| | } |
| | t = t[1:] |
| | case '$': |
| | if p.flags&OneLine != 0 { |
| | p.op(OpEndText).Flags |= WasDollar |
| | } else { |
| | p.op(OpEndLine) |
| | } |
| | t = t[1:] |
| | case '.': |
| | if p.flags&DotNL != 0 { |
| | p.op(OpAnyChar) |
| | } else { |
| | p.op(OpAnyCharNotNL) |
| | } |
| | t = t[1:] |
| | case '[': |
| | if t, err = p.parseClass(t); err != nil { |
| | return nil, err |
| | } |
| | case '*', '+', '?': |
| | before := t |
| | switch t[0] { |
| | case '*': |
| | op = OpStar |
| | case '+': |
| | op = OpPlus |
| | case '?': |
| | op = OpQuest |
| | } |
| | after := t[1:] |
| | if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil { |
| | return nil, err |
| | } |
| | repeat = before |
| | t = after |
| | case '{': |
| | op = OpRepeat |
| | before := t |
| | min, max, after, ok := p.parseRepeat(t) |
| | if !ok { |
| | |
| | p.literal('{') |
| | t = t[1:] |
| | break |
| | } |
| | if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max { |
| | |
| | return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]} |
| | } |
| | if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil { |
| | return nil, err |
| | } |
| | repeat = before |
| | t = after |
| | case '\\': |
| | if p.flags&PerlX != 0 && len(t) >= 2 { |
| | switch t[1] { |
| | case 'A': |
| | p.op(OpBeginText) |
| | t = t[2:] |
| | break BigSwitch |
| | case 'b': |
| | p.op(OpWordBoundary) |
| | t = t[2:] |
| | break BigSwitch |
| | case 'B': |
| | p.op(OpNoWordBoundary) |
| | t = t[2:] |
| | break BigSwitch |
| | case 'C': |
| | |
| | return nil, &Error{ErrInvalidEscape, t[:2]} |
| | case 'Q': |
| | |
| | var lit string |
| | lit, t, _ = strings.Cut(t[2:], `\E`) |
| | for lit != "" { |
| | c, rest, err := nextRune(lit) |
| | if err != nil { |
| | return nil, err |
| | } |
| | p.literal(c) |
| | lit = rest |
| | } |
| | break BigSwitch |
| | case 'z': |
| | p.op(OpEndText) |
| | t = t[2:] |
| | break BigSwitch |
| | } |
| | } |
| |
|
| | re := p.newRegexp(OpCharClass) |
| | re.Flags = p.flags |
| |
|
| | |
| | if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') { |
| | r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0]) |
| | if err != nil { |
| | return nil, err |
| | } |
| | if r != nil { |
| | re.Rune = r |
| | t = rest |
| | p.push(re) |
| | break BigSwitch |
| | } |
| | } |
| |
|
| | |
| | if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil { |
| | re.Rune = r |
| | t = rest |
| | p.push(re) |
| | break BigSwitch |
| | } |
| | p.reuse(re) |
| |
|
| | |
| | if c, t, err = p.parseEscape(t); err != nil { |
| | return nil, err |
| | } |
| | p.literal(c) |
| | } |
| | lastRepeat = repeat |
| | } |
| |
|
| | p.concat() |
| | if p.swapVerticalBar() { |
| | |
| | p.stack = p.stack[:len(p.stack)-1] |
| | } |
| | p.alternate() |
| |
|
| | n := len(p.stack) |
| | if n != 1 { |
| | return nil, &Error{ErrMissingParen, s} |
| | } |
| | return p.stack[0], nil |
| | } |
| |
|
| | |
| | |
| | |
| | func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) { |
| | if s == "" || s[0] != '{' { |
| | return |
| | } |
| | s = s[1:] |
| | var ok1 bool |
| | if min, s, ok1 = p.parseInt(s); !ok1 { |
| | return |
| | } |
| | if s == "" { |
| | return |
| | } |
| | if s[0] != ',' { |
| | max = min |
| | } else { |
| | s = s[1:] |
| | if s == "" { |
| | return |
| | } |
| | if s[0] == '}' { |
| | max = -1 |
| | } else if max, s, ok1 = p.parseInt(s); !ok1 { |
| | return |
| | } else if max < 0 { |
| | |
| | min = -1 |
| | } |
| | } |
| | if s == "" || s[0] != '}' { |
| | return |
| | } |
| | rest = s[1:] |
| | ok = true |
| | return |
| | } |
| |
|
| | |
| | |
| | |
| | func (p *parser) parsePerlFlags(s string) (rest string, err error) { |
| | t := s |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<' |
| | startsWithName := len(t) > 3 && t[2] == '<' |
| |
|
| | if startsWithP || startsWithName { |
| | |
| | exprStartPos := 4 |
| | if startsWithName { |
| | exprStartPos = 3 |
| | } |
| |
|
| | |
| | end := strings.IndexRune(t, '>') |
| | if end < 0 { |
| | if err = checkUTF8(t); err != nil { |
| | return "", err |
| | } |
| | return "", &Error{ErrInvalidNamedCapture, s} |
| | } |
| |
|
| | capture := t[:end+1] |
| | name := t[exprStartPos:end] |
| | if err = checkUTF8(name); err != nil { |
| | return "", err |
| | } |
| | if !isValidCaptureName(name) { |
| | return "", &Error{ErrInvalidNamedCapture, capture} |
| | } |
| |
|
| | |
| | p.numCap++ |
| | re := p.op(opLeftParen) |
| | re.Cap = p.numCap |
| | re.Name = name |
| | return t[end+1:], nil |
| | } |
| |
|
| | |
| | var c rune |
| | t = t[2:] |
| | flags := p.flags |
| | sign := +1 |
| | sawFlag := false |
| | Loop: |
| | for t != "" { |
| | if c, t, err = nextRune(t); err != nil { |
| | return "", err |
| | } |
| | switch c { |
| | default: |
| | break Loop |
| |
|
| | |
| | case 'i': |
| | flags |= FoldCase |
| | sawFlag = true |
| | case 'm': |
| | flags &^= OneLine |
| | sawFlag = true |
| | case 's': |
| | flags |= DotNL |
| | sawFlag = true |
| | case 'U': |
| | flags |= NonGreedy |
| | sawFlag = true |
| |
|
| | |
| | case '-': |
| | if sign < 0 { |
| | break Loop |
| | } |
| | sign = -1 |
| | |
| | |
| | flags = ^flags |
| | sawFlag = false |
| |
|
| | |
| | case ':', ')': |
| | if sign < 0 { |
| | if !sawFlag { |
| | break Loop |
| | } |
| | flags = ^flags |
| | } |
| | if c == ':' { |
| | |
| | p.op(opLeftParen) |
| | } |
| | p.flags = flags |
| | return t, nil |
| | } |
| | } |
| |
|
| | return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]} |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | func isValidCaptureName(name string) bool { |
| | if name == "" { |
| | return false |
| | } |
| | for _, c := range name { |
| | if c != '_' && !isalnum(c) { |
| | return false |
| | } |
| | } |
| | return true |
| | } |
| |
|
| | |
| | func (p *parser) parseInt(s string) (n int, rest string, ok bool) { |
| | if s == "" || s[0] < '0' || '9' < s[0] { |
| | return |
| | } |
| | |
| | if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' { |
| | return |
| | } |
| | t := s |
| | for s != "" && '0' <= s[0] && s[0] <= '9' { |
| | s = s[1:] |
| | } |
| | rest = s |
| | ok = true |
| | |
| | t = t[:len(t)-len(s)] |
| | for i := 0; i < len(t); i++ { |
| | |
| | if n >= 1e8 { |
| | n = -1 |
| | break |
| | } |
| | n = n*10 + int(t[i]) - '0' |
| | } |
| | return |
| | } |
| |
|
| | |
| | |
| | func isCharClass(re *Regexp) bool { |
| | return re.Op == OpLiteral && len(re.Rune) == 1 || |
| | re.Op == OpCharClass || |
| | re.Op == OpAnyCharNotNL || |
| | re.Op == OpAnyChar |
| | } |
| |
|
| | |
| | func matchRune(re *Regexp, r rune) bool { |
| | switch re.Op { |
| | case OpLiteral: |
| | return len(re.Rune) == 1 && re.Rune[0] == r |
| | case OpCharClass: |
| | for i := 0; i < len(re.Rune); i += 2 { |
| | if re.Rune[i] <= r && r <= re.Rune[i+1] { |
| | return true |
| | } |
| | } |
| | return false |
| | case OpAnyCharNotNL: |
| | return r != '\n' |
| | case OpAnyChar: |
| | return true |
| | } |
| | return false |
| | } |
| |
|
| | |
| | func (p *parser) parseVerticalBar() { |
| | p.concat() |
| |
|
| | |
| | |
| | |
| | |
| | if !p.swapVerticalBar() { |
| | p.op(opVerticalBar) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | func mergeCharClass(dst, src *Regexp) { |
| | switch dst.Op { |
| | case OpAnyChar: |
| | |
| | case OpAnyCharNotNL: |
| | |
| | if matchRune(src, '\n') { |
| | dst.Op = OpAnyChar |
| | } |
| | case OpCharClass: |
| | |
| | if src.Op == OpLiteral { |
| | dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags) |
| | } else { |
| | dst.Rune = appendClass(dst.Rune, src.Rune) |
| | } |
| | case OpLiteral: |
| | |
| | if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags { |
| | break |
| | } |
| | dst.Op = OpCharClass |
| | dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags) |
| | dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | func (p *parser) swapVerticalBar() bool { |
| | |
| | |
| | n := len(p.stack) |
| | if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) { |
| | re1 := p.stack[n-1] |
| | re3 := p.stack[n-3] |
| | |
| | if re1.Op > re3.Op { |
| | re1, re3 = re3, re1 |
| | p.stack[n-3] = re3 |
| | } |
| | mergeCharClass(re3, re1) |
| | p.reuse(re1) |
| | p.stack = p.stack[:n-1] |
| | return true |
| | } |
| |
|
| | if n >= 2 { |
| | re1 := p.stack[n-1] |
| | re2 := p.stack[n-2] |
| | if re2.Op == opVerticalBar { |
| | if n >= 3 { |
| | |
| | |
| | cleanAlt(p.stack[n-3]) |
| | } |
| | p.stack[n-2] = re1 |
| | p.stack[n-1] = re2 |
| | return true |
| | } |
| | } |
| | return false |
| | } |
| |
|
| | |
| | func (p *parser) parseRightParen() error { |
| | p.concat() |
| | if p.swapVerticalBar() { |
| | |
| | p.stack = p.stack[:len(p.stack)-1] |
| | } |
| | p.alternate() |
| |
|
| | n := len(p.stack) |
| | if n < 2 { |
| | return &Error{ErrUnexpectedParen, p.wholeRegexp} |
| | } |
| | re1 := p.stack[n-1] |
| | re2 := p.stack[n-2] |
| | p.stack = p.stack[:n-2] |
| | if re2.Op != opLeftParen { |
| | return &Error{ErrUnexpectedParen, p.wholeRegexp} |
| | } |
| | |
| | p.flags = re2.Flags |
| | if re2.Cap == 0 { |
| | |
| | p.push(re1) |
| | } else { |
| | re2.Op = OpCapture |
| | re2.Sub = re2.Sub0[:1] |
| | re2.Sub[0] = re1 |
| | p.push(re2) |
| | } |
| | return nil |
| | } |
| |
|
| | |
| | |
| | func (p *parser) parseEscape(s string) (r rune, rest string, err error) { |
| | t := s[1:] |
| | if t == "" { |
| | return 0, "", &Error{ErrTrailingBackslash, ""} |
| | } |
| | c, t, err := nextRune(t) |
| | if err != nil { |
| | return 0, "", err |
| | } |
| |
|
| | Switch: |
| | switch c { |
| | default: |
| | if c < utf8.RuneSelf && !isalnum(c) { |
| | |
| | |
| | |
| | |
| | return c, t, nil |
| | } |
| |
|
| | |
| | case '1', '2', '3', '4', '5', '6', '7': |
| | |
| | if t == "" || t[0] < '0' || t[0] > '7' { |
| | break |
| | } |
| | fallthrough |
| | case '0': |
| | |
| | r = c - '0' |
| | for i := 1; i < 3; i++ { |
| | if t == "" || t[0] < '0' || t[0] > '7' { |
| | break |
| | } |
| | r = r*8 + rune(t[0]) - '0' |
| | t = t[1:] |
| | } |
| | return r, t, nil |
| |
|
| | |
| | case 'x': |
| | if t == "" { |
| | break |
| | } |
| | if c, t, err = nextRune(t); err != nil { |
| | return 0, "", err |
| | } |
| | if c == '{' { |
| | |
| | |
| | |
| | |
| | nhex := 0 |
| | r = 0 |
| | for { |
| | if t == "" { |
| | break Switch |
| | } |
| | if c, t, err = nextRune(t); err != nil { |
| | return 0, "", err |
| | } |
| | if c == '}' { |
| | break |
| | } |
| | v := unhex(c) |
| | if v < 0 { |
| | break Switch |
| | } |
| | r = r*16 + v |
| | if r > unicode.MaxRune { |
| | break Switch |
| | } |
| | nhex++ |
| | } |
| | if nhex == 0 { |
| | break Switch |
| | } |
| | return r, t, nil |
| | } |
| |
|
| | |
| | x := unhex(c) |
| | if c, t, err = nextRune(t); err != nil { |
| | return 0, "", err |
| | } |
| | y := unhex(c) |
| | if x < 0 || y < 0 { |
| | break |
| | } |
| | return x*16 + y, t, nil |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | case 'a': |
| | return '\a', t, err |
| | case 'f': |
| | return '\f', t, err |
| | case 'n': |
| | return '\n', t, err |
| | case 'r': |
| | return '\r', t, err |
| | case 't': |
| | return '\t', t, err |
| | case 'v': |
| | return '\v', t, err |
| | } |
| | return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]} |
| | } |
| |
|
| | |
| | |
| | func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) { |
| | if s == "" { |
| | return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass} |
| | } |
| |
|
| | |
| | |
| | if s[0] == '\\' { |
| | return p.parseEscape(s) |
| | } |
| |
|
| | return nextRune(s) |
| | } |
| |
|
| | type charGroup struct { |
| | sign int |
| | class []rune |
| | } |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) { |
| | if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' { |
| | return |
| | } |
| | g := perlGroup[s[0:2]] |
| | if g.sign == 0 { |
| | return |
| | } |
| | return p.appendGroup(r, g), s[2:] |
| | } |
| |
|
| | |
| | |
| | |
| | func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) { |
| | if len(s) < 2 || s[0] != '[' || s[1] != ':' { |
| | return |
| | } |
| |
|
| | i := strings.Index(s[2:], ":]") |
| | if i < 0 { |
| | return |
| | } |
| | i += 2 |
| | name, s := s[0:i+2], s[i+2:] |
| | g := posixGroup[name] |
| | if g.sign == 0 { |
| | return nil, "", &Error{ErrInvalidCharRange, name} |
| | } |
| | return p.appendGroup(r, g), s, nil |
| | } |
| |
|
| | func (p *parser) appendGroup(r []rune, g charGroup) []rune { |
| | if p.flags&FoldCase == 0 { |
| | if g.sign < 0 { |
| | r = appendNegatedClass(r, g.class) |
| | } else { |
| | r = appendClass(r, g.class) |
| | } |
| | } else { |
| | tmp := p.tmpClass[:0] |
| | tmp = appendFoldedClass(tmp, g.class) |
| | p.tmpClass = tmp |
| | tmp = cleanClass(&p.tmpClass) |
| | if g.sign < 0 { |
| | r = appendNegatedClass(r, tmp) |
| | } else { |
| | r = appendClass(r, tmp) |
| | } |
| | } |
| | return r |
| | } |
| |
|
| | var anyTable = &unicode.RangeTable{ |
| | R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}}, |
| | R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}}, |
| | } |
| |
|
| | var asciiTable = &unicode.RangeTable{ |
| | R16: []unicode.Range16{{Lo: 0, Hi: 0x7F, Stride: 1}}, |
| | } |
| |
|
| | var asciiFoldTable = &unicode.RangeTable{ |
| | R16: []unicode.Range16{ |
| | {Lo: 0, Hi: 0x7F, Stride: 1}, |
| | {Lo: 0x017F, Hi: 0x017F, Stride: 1}, |
| | {Lo: 0x212A, Hi: 0x212A, Stride: 1}, |
| | }, |
| | } |
| |
|
| | |
| | |
| | var categoryAliases struct { |
| | once sync.Once |
| | m map[string]string |
| | } |
| |
|
| | |
| | func initCategoryAliases() { |
| | categoryAliases.m = make(map[string]string) |
| | for name, actual := range unicode.CategoryAliases { |
| | categoryAliases.m[canonicalName(name)] = actual |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | func canonicalName(name string) string { |
| | var b []byte |
| | first := true |
| | for i := range len(name) { |
| | c := name[i] |
| | switch { |
| | case c == '_' || c == '-' || c == ' ': |
| | c = ' ' |
| | case first: |
| | if 'a' <= c && c <= 'z' { |
| | c -= 'a' - 'A' |
| | } |
| | first = false |
| | default: |
| | if 'A' <= c && c <= 'Z' { |
| | c += 'a' - 'A' |
| | } |
| | } |
| | if b == nil { |
| | if c == name[i] && c != ' ' { |
| | |
| | continue |
| | } |
| | b = make([]byte, i, len(name)) |
| | copy(b, name[:i]) |
| | } |
| | if c == ' ' { |
| | continue |
| | } |
| | b = append(b, c) |
| | } |
| | if b == nil { |
| | return name |
| | } |
| | return string(b) |
| | } |
| |
|
| | |
| | |
| | |
| | func unicodeTable(name string) (tab, fold *unicode.RangeTable, sign int) { |
| | name = canonicalName(name) |
| |
|
| | |
| | |
| | switch name { |
| | case "Any": |
| | return anyTable, anyTable, +1 |
| | case "Assigned": |
| | return unicode.Cn, unicode.Cn, -1 |
| | case "Ascii": |
| | return asciiTable, asciiFoldTable, +1 |
| | case "Lc": |
| | return unicode.Categories["LC"], unicode.FoldCategory["LC"], +1 |
| | } |
| | if t := unicode.Categories[name]; t != nil { |
| | return t, unicode.FoldCategory[name], +1 |
| | } |
| | if t := unicode.Scripts[name]; t != nil { |
| | return t, unicode.FoldScript[name], +1 |
| | } |
| |
|
| | |
| | |
| | |
| | categoryAliases.once.Do(initCategoryAliases) |
| | if actual := categoryAliases.m[name]; actual != "" { |
| | t := unicode.Categories[actual] |
| | return t, unicode.FoldCategory[actual], +1 |
| | } |
| | return nil, nil, 0 |
| | } |
| |
|
| | |
| | |
| | |
| | func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) { |
| | if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' { |
| | return |
| | } |
| |
|
| | |
| | sign := +1 |
| | if s[1] == 'P' { |
| | sign = -1 |
| | } |
| | t := s[2:] |
| | c, t, err := nextRune(t) |
| | if err != nil { |
| | return |
| | } |
| | var seq, name string |
| | if c != '{' { |
| | |
| | seq = s[:len(s)-len(t)] |
| | name = seq[2:] |
| | } else { |
| | |
| | end := strings.IndexRune(s, '}') |
| | if end < 0 { |
| | if err = checkUTF8(s); err != nil { |
| | return |
| | } |
| | return nil, "", &Error{ErrInvalidCharRange, s} |
| | } |
| | seq, t = s[:end+1], s[end+1:] |
| | name = s[3:end] |
| | if err = checkUTF8(name); err != nil { |
| | return |
| | } |
| | } |
| |
|
| | |
| | if name != "" && name[0] == '^' { |
| | sign = -sign |
| | name = name[1:] |
| | } |
| |
|
| | tab, fold, tsign := unicodeTable(name) |
| | if tab == nil { |
| | return nil, "", &Error{ErrInvalidCharRange, seq} |
| | } |
| | if tsign < 0 { |
| | sign = -sign |
| | } |
| |
|
| | if p.flags&FoldCase == 0 || fold == nil { |
| | if sign > 0 { |
| | r = appendTable(r, tab) |
| | } else { |
| | r = appendNegatedTable(r, tab) |
| | } |
| | } else { |
| | |
| | |
| | |
| | tmp := p.tmpClass[:0] |
| | tmp = appendTable(tmp, tab) |
| | tmp = appendTable(tmp, fold) |
| | p.tmpClass = tmp |
| | tmp = cleanClass(&p.tmpClass) |
| | if sign > 0 { |
| | r = appendClass(r, tmp) |
| | } else { |
| | r = appendNegatedClass(r, tmp) |
| | } |
| | } |
| | return r, t, nil |
| | } |
| |
|
| | |
| | |
| | func (p *parser) parseClass(s string) (rest string, err error) { |
| | t := s[1:] |
| | re := p.newRegexp(OpCharClass) |
| | re.Flags = p.flags |
| | re.Rune = re.Rune0[:0] |
| |
|
| | sign := +1 |
| | if t != "" && t[0] == '^' { |
| | sign = -1 |
| | t = t[1:] |
| |
|
| | |
| | |
| | if p.flags&ClassNL == 0 { |
| | re.Rune = append(re.Rune, '\n', '\n') |
| | } |
| | } |
| |
|
| | class := re.Rune |
| | first := true |
| | for t == "" || t[0] != ']' || first { |
| | |
| | |
| | if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') { |
| | _, size := utf8.DecodeRuneInString(t[1:]) |
| | return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]} |
| | } |
| | first = false |
| |
|
| | |
| | if len(t) > 2 && t[0] == '[' && t[1] == ':' { |
| | nclass, nt, err := p.parseNamedClass(t, class) |
| | if err != nil { |
| | return "", err |
| | } |
| | if nclass != nil { |
| | class, t = nclass, nt |
| | continue |
| | } |
| | } |
| |
|
| | |
| | nclass, nt, err := p.parseUnicodeClass(t, class) |
| | if err != nil { |
| | return "", err |
| | } |
| | if nclass != nil { |
| | class, t = nclass, nt |
| | continue |
| | } |
| |
|
| | |
| | if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil { |
| | class, t = nclass, nt |
| | continue |
| | } |
| |
|
| | |
| | rng := t |
| | var lo, hi rune |
| | if lo, t, err = p.parseClassChar(t, s); err != nil { |
| | return "", err |
| | } |
| | hi = lo |
| | |
| | if len(t) >= 2 && t[0] == '-' && t[1] != ']' { |
| | t = t[1:] |
| | if hi, t, err = p.parseClassChar(t, s); err != nil { |
| | return "", err |
| | } |
| | if hi < lo { |
| | rng = rng[:len(rng)-len(t)] |
| | return "", &Error{Code: ErrInvalidCharRange, Expr: rng} |
| | } |
| | } |
| | if p.flags&FoldCase == 0 { |
| | class = appendRange(class, lo, hi) |
| | } else { |
| | class = appendFoldedRange(class, lo, hi) |
| | } |
| | } |
| | t = t[1:] |
| |
|
| | |
| | re.Rune = class |
| | class = cleanClass(&re.Rune) |
| | if sign < 0 { |
| | class = negateClass(class) |
| | } |
| | re.Rune = class |
| | p.push(re) |
| | return t, nil |
| | } |
| |
|
| | |
| | |
| | func cleanClass(rp *[]rune) []rune { |
| |
|
| | |
| | sort.Sort(ranges{rp}) |
| |
|
| | r := *rp |
| | if len(r) < 2 { |
| | return r |
| | } |
| |
|
| | |
| | w := 2 |
| | for i := 2; i < len(r); i += 2 { |
| | lo, hi := r[i], r[i+1] |
| | if lo <= r[w-1]+1 { |
| | |
| | if hi > r[w-1] { |
| | r[w-1] = hi |
| | } |
| | continue |
| | } |
| | |
| | r[w] = lo |
| | r[w+1] = hi |
| | w += 2 |
| | } |
| |
|
| | return r[:w] |
| | } |
| |
|
| | |
| | |
| | func inCharClass(r rune, class []rune) bool { |
| | _, ok := sort.Find(len(class)/2, func(i int) int { |
| | lo, hi := class[2*i], class[2*i+1] |
| | if r > hi { |
| | return +1 |
| | } |
| | if r < lo { |
| | return -1 |
| | } |
| | return 0 |
| | }) |
| | return ok |
| | } |
| |
|
| | |
| | func appendLiteral(r []rune, x rune, flags Flags) []rune { |
| | if flags&FoldCase != 0 { |
| | return appendFoldedRange(r, x, x) |
| | } |
| | return appendRange(r, x, x) |
| | } |
| |
|
| | |
| | func appendRange(r []rune, lo, hi rune) []rune { |
| | |
| | |
| | |
| | |
| | n := len(r) |
| | for i := 2; i <= 4; i += 2 { |
| | if n >= i { |
| | rlo, rhi := r[n-i], r[n-i+1] |
| | if lo <= rhi+1 && rlo <= hi+1 { |
| | if lo < rlo { |
| | r[n-i] = lo |
| | } |
| | if hi > rhi { |
| | r[n-i+1] = hi |
| | } |
| | return r |
| | } |
| | } |
| | } |
| |
|
| | return append(r, lo, hi) |
| | } |
| |
|
| | const ( |
| | |
| | |
| | minFold = 0x0041 |
| | maxFold = 0x1e943 |
| | ) |
| |
|
| | |
| | |
| | func appendFoldedRange(r []rune, lo, hi rune) []rune { |
| | |
| | if lo <= minFold && hi >= maxFold { |
| | |
| | return appendRange(r, lo, hi) |
| | } |
| | if hi < minFold || lo > maxFold { |
| | |
| | return appendRange(r, lo, hi) |
| | } |
| | if lo < minFold { |
| | |
| | r = appendRange(r, lo, minFold-1) |
| | lo = minFold |
| | } |
| | if hi > maxFold { |
| | |
| | r = appendRange(r, maxFold+1, hi) |
| | hi = maxFold |
| | } |
| |
|
| | |
| | for c := lo; c <= hi; c++ { |
| | r = appendRange(r, c, c) |
| | f := unicode.SimpleFold(c) |
| | for f != c { |
| | r = appendRange(r, f, f) |
| | f = unicode.SimpleFold(f) |
| | } |
| | } |
| | return r |
| | } |
| |
|
| | |
| | |
| | func appendClass(r []rune, x []rune) []rune { |
| | for i := 0; i < len(x); i += 2 { |
| | r = appendRange(r, x[i], x[i+1]) |
| | } |
| | return r |
| | } |
| |
|
| | |
| | func appendFoldedClass(r []rune, x []rune) []rune { |
| | for i := 0; i < len(x); i += 2 { |
| | r = appendFoldedRange(r, x[i], x[i+1]) |
| | } |
| | return r |
| | } |
| |
|
| | |
| | |
| | func appendNegatedClass(r []rune, x []rune) []rune { |
| | nextLo := '\u0000' |
| | for i := 0; i < len(x); i += 2 { |
| | lo, hi := x[i], x[i+1] |
| | if nextLo <= lo-1 { |
| | r = appendRange(r, nextLo, lo-1) |
| | } |
| | nextLo = hi + 1 |
| | } |
| | if nextLo <= unicode.MaxRune { |
| | r = appendRange(r, nextLo, unicode.MaxRune) |
| | } |
| | return r |
| | } |
| |
|
| | |
| | func appendTable(r []rune, x *unicode.RangeTable) []rune { |
| | for _, xr := range x.R16 { |
| | lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) |
| | if stride == 1 { |
| | r = appendRange(r, lo, hi) |
| | continue |
| | } |
| | for c := lo; c <= hi; c += stride { |
| | r = appendRange(r, c, c) |
| | } |
| | } |
| | for _, xr := range x.R32 { |
| | lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) |
| | if stride == 1 { |
| | r = appendRange(r, lo, hi) |
| | continue |
| | } |
| | for c := lo; c <= hi; c += stride { |
| | r = appendRange(r, c, c) |
| | } |
| | } |
| | return r |
| | } |
| |
|
| | |
| | func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune { |
| | nextLo := '\u0000' |
| | for _, xr := range x.R16 { |
| | lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) |
| | if stride == 1 { |
| | if nextLo <= lo-1 { |
| | r = appendRange(r, nextLo, lo-1) |
| | } |
| | nextLo = hi + 1 |
| | continue |
| | } |
| | for c := lo; c <= hi; c += stride { |
| | if nextLo <= c-1 { |
| | r = appendRange(r, nextLo, c-1) |
| | } |
| | nextLo = c + 1 |
| | } |
| | } |
| | for _, xr := range x.R32 { |
| | lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) |
| | if stride == 1 { |
| | if nextLo <= lo-1 { |
| | r = appendRange(r, nextLo, lo-1) |
| | } |
| | nextLo = hi + 1 |
| | continue |
| | } |
| | for c := lo; c <= hi; c += stride { |
| | if nextLo <= c-1 { |
| | r = appendRange(r, nextLo, c-1) |
| | } |
| | nextLo = c + 1 |
| | } |
| | } |
| | if nextLo <= unicode.MaxRune { |
| | r = appendRange(r, nextLo, unicode.MaxRune) |
| | } |
| | return r |
| | } |
| |
|
| | |
| | |
| | func negateClass(r []rune) []rune { |
| | nextLo := '\u0000' |
| | w := 0 |
| | for i := 0; i < len(r); i += 2 { |
| | lo, hi := r[i], r[i+1] |
| | if nextLo <= lo-1 { |
| | r[w] = nextLo |
| | r[w+1] = lo - 1 |
| | w += 2 |
| | } |
| | nextLo = hi + 1 |
| | } |
| | r = r[:w] |
| | if nextLo <= unicode.MaxRune { |
| | |
| | |
| | r = append(r, nextLo, unicode.MaxRune) |
| | } |
| | return r |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | type ranges struct { |
| | p *[]rune |
| | } |
| |
|
| | func (ra ranges) Less(i, j int) bool { |
| | p := *ra.p |
| | i *= 2 |
| | j *= 2 |
| | return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1] |
| | } |
| |
|
| | func (ra ranges) Len() int { |
| | return len(*ra.p) / 2 |
| | } |
| |
|
| | func (ra ranges) Swap(i, j int) { |
| | p := *ra.p |
| | i *= 2 |
| | j *= 2 |
| | p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1] |
| | } |
| |
|
| | func checkUTF8(s string) error { |
| | for s != "" { |
| | rune, size := utf8.DecodeRuneInString(s) |
| | if rune == utf8.RuneError && size == 1 { |
| | return &Error{Code: ErrInvalidUTF8, Expr: s} |
| | } |
| | s = s[size:] |
| | } |
| | return nil |
| | } |
| |
|
| | func nextRune(s string) (c rune, t string, err error) { |
| | c, size := utf8.DecodeRuneInString(s) |
| | if c == utf8.RuneError && size == 1 { |
| | return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} |
| | } |
| | return c, s[size:], nil |
| | } |
| |
|
| | func isalnum(c rune) bool { |
| | return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
| | } |
| |
|
| | func unhex(c rune) rune { |
| | if '0' <= c && c <= '9' { |
| | return c - '0' |
| | } |
| | if 'a' <= c && c <= 'f' { |
| | return c - 'a' + 10 |
| | } |
| | if 'A' <= c && c <= 'F' { |
| | return c - 'A' + 10 |
| | } |
| | return -1 |
| | } |
| |
|