| | |
| | |
| | |
| |
|
| | package regexp |
| |
|
| | import ( |
| | "bufio" |
| | "compress/bzip2" |
| | "fmt" |
| | "internal/testenv" |
| | "io" |
| | "os" |
| | "path/filepath" |
| | "regexp/syntax" |
| | "slices" |
| | "strconv" |
| | "strings" |
| | "testing" |
| | "unicode/utf8" |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | func TestRE2Search(t *testing.T) { |
| | testRE2(t, "testdata/re2-search.txt") |
| | } |
| |
|
| | func testRE2(t *testing.T, file string) { |
| | f, err := os.Open(file) |
| | if err != nil { |
| | t.Fatal(err) |
| | } |
| | defer f.Close() |
| | var txt io.Reader |
| | if strings.HasSuffix(file, ".bz2") { |
| | z := bzip2.NewReader(f) |
| | txt = z |
| | file = file[:len(file)-len(".bz2")] |
| | } else { |
| | txt = f |
| | } |
| | lineno := 0 |
| | scanner := bufio.NewScanner(txt) |
| | var ( |
| | str []string |
| | input []string |
| | inStrings bool |
| | re *Regexp |
| | refull *Regexp |
| | nfail int |
| | ncase int |
| | ) |
| | for lineno := 1; scanner.Scan(); lineno++ { |
| | line := scanner.Text() |
| | switch { |
| | case line == "": |
| | t.Fatalf("%s:%d: unexpected blank line", file, lineno) |
| | case line[0] == '#': |
| | continue |
| | case 'A' <= line[0] && line[0] <= 'Z': |
| | |
| | t.Logf("%s\n", line) |
| | continue |
| | case line == "strings": |
| | str = str[:0] |
| | inStrings = true |
| | case line == "regexps": |
| | inStrings = false |
| | case line[0] == '"': |
| | q, err := strconv.Unquote(line) |
| | if err != nil { |
| | |
| | t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err) |
| | } |
| | if inStrings { |
| | str = append(str, q) |
| | continue |
| | } |
| | |
| | if len(input) != 0 { |
| | t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q) |
| | } |
| | re, err = tryCompile(q) |
| | if err != nil { |
| | if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" { |
| | |
| | continue |
| | } |
| | t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err) |
| | if nfail++; nfail >= 100 { |
| | t.Fatalf("stopping after %d errors", nfail) |
| | } |
| | continue |
| | } |
| | full := `\A(?:` + q + `)\z` |
| | refull, err = tryCompile(full) |
| | if err != nil { |
| | |
| | t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err) |
| | } |
| | input = str |
| | case line[0] == '-' || '0' <= line[0] && line[0] <= '9': |
| | |
| | ncase++ |
| | if re == nil { |
| | |
| | continue |
| | } |
| | if len(input) == 0 { |
| | t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno) |
| | } |
| | var text string |
| | text, input = input[0], input[1:] |
| | if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) { |
| | |
| | |
| | |
| | |
| | |
| | continue |
| | } |
| | res := strings.Split(line, ";") |
| | if len(res) != len(run) { |
| | t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run)) |
| | } |
| | for i := range res { |
| | have, suffix := run[i](re, refull, text) |
| | want := parseResult(t, file, lineno, res[i]) |
| | if !slices.Equal(have, want) { |
| | t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want) |
| | if nfail++; nfail >= 100 { |
| | t.Fatalf("stopping after %d errors", nfail) |
| | } |
| | continue |
| | } |
| | b, suffix := match[i](re, refull, text) |
| | if b != (want != nil) { |
| | t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b) |
| | if nfail++; nfail >= 100 { |
| | t.Fatalf("stopping after %d errors", nfail) |
| | } |
| | continue |
| | } |
| | } |
| |
|
| | default: |
| | t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line) |
| | } |
| | } |
| | if err := scanner.Err(); err != nil { |
| | t.Fatalf("%s:%d: %v", file, lineno, err) |
| | } |
| | if len(input) != 0 { |
| | t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input)) |
| | } |
| | t.Logf("%d cases tested", ncase) |
| | } |
| |
|
| | var run = []func(*Regexp, *Regexp, string) ([]int, string){ |
| | runFull, |
| | runPartial, |
| | runFullLongest, |
| | runPartialLongest, |
| | } |
| |
|
| | func runFull(re, refull *Regexp, text string) ([]int, string) { |
| | refull.longest = false |
| | return refull.FindStringSubmatchIndex(text), "[full]" |
| | } |
| |
|
| | func runPartial(re, refull *Regexp, text string) ([]int, string) { |
| | re.longest = false |
| | return re.FindStringSubmatchIndex(text), "" |
| | } |
| |
|
| | func runFullLongest(re, refull *Regexp, text string) ([]int, string) { |
| | refull.longest = true |
| | return refull.FindStringSubmatchIndex(text), "[full,longest]" |
| | } |
| |
|
| | func runPartialLongest(re, refull *Regexp, text string) ([]int, string) { |
| | re.longest = true |
| | return re.FindStringSubmatchIndex(text), "[longest]" |
| | } |
| |
|
| | var match = []func(*Regexp, *Regexp, string) (bool, string){ |
| | matchFull, |
| | matchPartial, |
| | matchFullLongest, |
| | matchPartialLongest, |
| | } |
| |
|
| | func matchFull(re, refull *Regexp, text string) (bool, string) { |
| | refull.longest = false |
| | return refull.MatchString(text), "[full]" |
| | } |
| |
|
| | func matchPartial(re, refull *Regexp, text string) (bool, string) { |
| | re.longest = false |
| | return re.MatchString(text), "" |
| | } |
| |
|
| | func matchFullLongest(re, refull *Regexp, text string) (bool, string) { |
| | refull.longest = true |
| | return refull.MatchString(text), "[full,longest]" |
| | } |
| |
|
| | func matchPartialLongest(re, refull *Regexp, text string) (bool, string) { |
| | re.longest = true |
| | return re.MatchString(text), "[longest]" |
| | } |
| |
|
| | func isSingleBytes(s string) bool { |
| | for _, c := range s { |
| | if c >= utf8.RuneSelf { |
| | return false |
| | } |
| | } |
| | return true |
| | } |
| |
|
| | func tryCompile(s string) (re *Regexp, err error) { |
| | |
| | defer func() { |
| | if r := recover(); r != nil { |
| | err = fmt.Errorf("panic: %v", r) |
| | } |
| | }() |
| | return Compile(s) |
| | } |
| |
|
| | func parseResult(t *testing.T, file string, lineno int, res string) []int { |
| | |
| | if res == "-" { |
| | return nil |
| | } |
| | |
| | n := 1 |
| | for j := 0; j < len(res); j++ { |
| | if res[j] == ' ' { |
| | n++ |
| | } |
| | } |
| | out := make([]int, 2*n) |
| | i := 0 |
| | n = 0 |
| | for j := 0; j <= len(res); j++ { |
| | if j == len(res) || res[j] == ' ' { |
| | |
| | pair := res[i:j] |
| | if pair == "-" { |
| | out[n] = -1 |
| | out[n+1] = -1 |
| | } else { |
| | loStr, hiStr, _ := strings.Cut(pair, "-") |
| | lo, err1 := strconv.Atoi(loStr) |
| | hi, err2 := strconv.Atoi(hiStr) |
| | if err1 != nil || err2 != nil || lo > hi { |
| | t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair) |
| | } |
| | out[n] = lo |
| | out[n+1] = hi |
| | } |
| | n += 2 |
| | i = j + 1 |
| | } |
| | } |
| | return out |
| | } |
| |
|
| | |
| | |
| | |
| | func TestFowler(t *testing.T) { |
| | files, err := filepath.Glob("testdata/*.dat") |
| | if err != nil { |
| | t.Fatal(err) |
| | } |
| | for _, file := range files { |
| | t.Log(file) |
| | testFowler(t, file) |
| | } |
| | } |
| |
|
| | var notab = MustCompilePOSIX(`[^\t]+`) |
| |
|
| | func testFowler(t *testing.T, file string) { |
| | f, err := os.Open(file) |
| | if err != nil { |
| | t.Error(err) |
| | return |
| | } |
| | defer f.Close() |
| | b := bufio.NewReader(f) |
| | lineno := 0 |
| | lastRegexp := "" |
| | Reading: |
| | for { |
| | lineno++ |
| | line, err := b.ReadString('\n') |
| | if err != nil { |
| | if err != io.EOF { |
| | t.Errorf("%s:%d: %v", file, lineno, err) |
| | } |
| | break Reading |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if line[0] == '#' || line[0] == '\n' { |
| | continue Reading |
| | } |
| | line = line[:len(line)-1] |
| | field := notab.FindAllString(line, -1) |
| | for i, f := range field { |
| | if f == "NULL" { |
| | field[i] = "" |
| | } |
| | if f == "NIL" { |
| | t.Logf("%s:%d: skip: %s", file, lineno, line) |
| | continue Reading |
| | } |
| | } |
| | if len(field) == 0 { |
| | continue Reading |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | flag := field[0] |
| | switch flag[0] { |
| | case '?', '&', '|', ';', '{', '}': |
| | |
| | |
| | flag = flag[1:] |
| | if flag == "" { |
| | continue Reading |
| | } |
| | case ':': |
| | var ok bool |
| | if _, flag, ok = strings.Cut(flag[1:], ":"); !ok { |
| | t.Logf("skip: %s", line) |
| | continue Reading |
| | } |
| | case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| | t.Logf("skip: %s", line) |
| | continue Reading |
| | } |
| |
|
| | |
| | if len(field) < 4 { |
| | t.Errorf("%s:%d: too few fields: %s", file, lineno, line) |
| | continue Reading |
| | } |
| |
|
| | |
| | if strings.Contains(flag, "$") { |
| | f := `"` + field[1] + `"` |
| | if field[1], err = strconv.Unquote(f); err != nil { |
| | t.Errorf("%s:%d: cannot unquote %s", file, lineno, f) |
| | } |
| | f = `"` + field[2] + `"` |
| | if field[2], err = strconv.Unquote(f); err != nil { |
| | t.Errorf("%s:%d: cannot unquote %s", file, lineno, f) |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | if field[1] == "SAME" { |
| | field[1] = lastRegexp |
| | } |
| | lastRegexp = field[1] |
| |
|
| | |
| | text := field[2] |
| |
|
| | |
| | ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3]) |
| | if !ok { |
| | t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3]) |
| | continue Reading |
| | } |
| |
|
| | |
| |
|
| | Testing: |
| | |
| | for _, c := range flag { |
| | pattern := field[1] |
| | syn := syntax.POSIX | syntax.ClassNL |
| | switch c { |
| | default: |
| | continue Testing |
| | case 'E': |
| | |
| | case 'L': |
| | |
| | pattern = QuoteMeta(pattern) |
| | } |
| |
|
| | for _, c := range flag { |
| | switch c { |
| | case 'i': |
| | syn |= syntax.FoldCase |
| | } |
| | } |
| |
|
| | re, err := compile(pattern, syn, true) |
| | if err != nil { |
| | if shouldCompile { |
| | t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern) |
| | } |
| | continue Testing |
| | } |
| | if !shouldCompile { |
| | t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern) |
| | continue Testing |
| | } |
| | match := re.MatchString(text) |
| | if match != shouldMatch { |
| | t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch) |
| | continue Testing |
| | } |
| | have := re.FindStringSubmatchIndex(text) |
| | if (len(have) > 0) != match { |
| | t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have) |
| | continue Testing |
| | } |
| | if len(have) > len(pos) { |
| | have = have[:len(pos)] |
| | } |
| | if !slices.Equal(have, pos) { |
| | t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos) |
| | } |
| | } |
| | } |
| | } |
| |
|
| | func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) { |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | switch { |
| | case s == "": |
| | |
| | ok = true |
| | compiled = true |
| | matched = true |
| | return |
| | case s == "NOMATCH": |
| | |
| | ok = true |
| | compiled = true |
| | matched = false |
| | return |
| | case 'A' <= s[0] && s[0] <= 'Z': |
| | |
| | ok = true |
| | compiled = false |
| | return |
| | } |
| | compiled = true |
| |
|
| | var x []int |
| | for s != "" { |
| | var end byte = ')' |
| | if len(x)%2 == 0 { |
| | if s[0] != '(' { |
| | ok = false |
| | return |
| | } |
| | s = s[1:] |
| | end = ',' |
| | } |
| | i := 0 |
| | for i < len(s) && s[i] != end { |
| | i++ |
| | } |
| | if i == 0 || i == len(s) { |
| | ok = false |
| | return |
| | } |
| | var v = -1 |
| | var err error |
| | if s[:i] != "?" { |
| | v, err = strconv.Atoi(s[:i]) |
| | if err != nil { |
| | ok = false |
| | return |
| | } |
| | } |
| | x = append(x, v) |
| | s = s[i+1:] |
| | } |
| | if len(x)%2 != 0 { |
| | ok = false |
| | return |
| | } |
| | ok = true |
| | matched = true |
| | pos = x |
| | return |
| | } |
| |
|
| | var text []byte |
| |
|
| | func makeText(n int) []byte { |
| | if len(text) >= n { |
| | return text[:n] |
| | } |
| | text = make([]byte, n) |
| | x := ^uint32(0) |
| | for i := range text { |
| | x += x |
| | x ^= 1 |
| | if int32(x) < 0 { |
| | x ^= 0x88888eef |
| | } |
| | if x%31 == 0 { |
| | text[i] = '\n' |
| | } else { |
| | text[i] = byte(x%(0x7E+1-0x20) + 0x20) |
| | } |
| | } |
| | return text |
| | } |
| |
|
| | func BenchmarkMatch(b *testing.B) { |
| | isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race") |
| |
|
| | for _, data := range benchData { |
| | r := MustCompile(data.re) |
| | for _, size := range benchSizes { |
| | if (isRaceBuilder || testing.Short()) && size.n > 1<<10 { |
| | continue |
| | } |
| | t := makeText(size.n) |
| | b.Run(data.name+"/"+size.name, func(b *testing.B) { |
| | b.SetBytes(int64(size.n)) |
| | for i := 0; i < b.N; i++ { |
| | if r.Match(t) { |
| | b.Fatal("match!") |
| | } |
| | } |
| | }) |
| | } |
| | } |
| | } |
| |
|
| | func BenchmarkMatch_onepass_regex(b *testing.B) { |
| | isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race") |
| | r := MustCompile(`(?s)\A.*\z`) |
| | if r.onepass == nil { |
| | b.Fatalf("want onepass regex, but %q is not onepass", r) |
| | } |
| | for _, size := range benchSizes { |
| | if (isRaceBuilder || testing.Short()) && size.n > 1<<10 { |
| | continue |
| | } |
| | t := makeText(size.n) |
| | b.Run(size.name, func(b *testing.B) { |
| | b.SetBytes(int64(size.n)) |
| | b.ReportAllocs() |
| | for i := 0; i < b.N; i++ { |
| | if !r.Match(t) { |
| | b.Fatal("not match!") |
| | } |
| | } |
| | }) |
| | } |
| | } |
| |
|
| | var benchData = []struct{ name, re string }{ |
| | {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, |
| | {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"}, |
| | {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"}, |
| | {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, |
| | {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, |
| | {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"}, |
| | } |
| |
|
| | var benchSizes = []struct { |
| | name string |
| | n int |
| | }{ |
| | {"16", 16}, |
| | {"32", 32}, |
| | {"1K", 1 << 10}, |
| | {"32K", 32 << 10}, |
| | {"1M", 1 << 20}, |
| | {"32M", 32 << 20}, |
| | } |
| |
|
| | func TestLongest(t *testing.T) { |
| | re, err := Compile(`a(|b)`) |
| | if err != nil { |
| | t.Fatal(err) |
| | } |
| | if g, w := re.FindString("ab"), "a"; g != w { |
| | t.Errorf("first match was %q, want %q", g, w) |
| | } |
| | re.Longest() |
| | if g, w := re.FindString("ab"), "ab"; g != w { |
| | t.Errorf("longest match was %q, want %q", g, w) |
| | } |
| | } |
| |
|
| | |
| | |
| | func TestProgramTooLongForBacktrack(t *testing.T) { |
| | longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`) |
| | if !longRegex.MatchString("two") { |
| | t.Errorf("longRegex.MatchString(\"two\") was false, want true") |
| | } |
| | if longRegex.MatchString("xxx") { |
| | t.Errorf("longRegex.MatchString(\"xxx\") was true, want false") |
| | } |
| | } |
| |
|