Source file
src/regexp/exec_test.go
1
2
3
4
5 package regexp
6
7 import (
8 "bufio"
9 "compress/bzip2"
10 "fmt"
11 "internal/testenv"
12 "io"
13 "os"
14 "path/filepath"
15 "regexp/syntax"
16 "slices"
17 "strconv"
18 "strings"
19 "testing"
20 "unicode/utf8"
21 )
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 func TestRE2Search(t *testing.T) {
67 testRE2(t, "testdata/re2-search.txt")
68 }
69
70 func testRE2(t *testing.T, file string) {
71 f, err := os.Open(file)
72 if err != nil {
73 t.Fatal(err)
74 }
75 defer f.Close()
76 var txt io.Reader
77 if strings.HasSuffix(file, ".bz2") {
78 z := bzip2.NewReader(f)
79 txt = z
80 file = file[:len(file)-len(".bz2")]
81 } else {
82 txt = f
83 }
84 lineno := 0
85 scanner := bufio.NewScanner(txt)
86 var (
87 str []string
88 input []string
89 inStrings bool
90 re *Regexp
91 refull *Regexp
92 nfail int
93 ncase int
94 )
95 for lineno := 1; scanner.Scan(); lineno++ {
96 line := scanner.Text()
97 switch {
98 case line == "":
99 t.Fatalf("%s:%d: unexpected blank line", file, lineno)
100 case line[0] == '#':
101 continue
102 case 'A' <= line[0] && line[0] <= 'Z':
103
104 t.Logf("%s\n", line)
105 continue
106 case line == "strings":
107 str = str[:0]
108 inStrings = true
109 case line == "regexps":
110 inStrings = false
111 case line[0] == '"':
112 q, err := strconv.Unquote(line)
113 if err != nil {
114
115 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
116 }
117 if inStrings {
118 str = append(str, q)
119 continue
120 }
121
122 if len(input) != 0 {
123 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
124 }
125 re, err = tryCompile(q)
126 if err != nil {
127 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
128
129 continue
130 }
131 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
132 if nfail++; nfail >= 100 {
133 t.Fatalf("stopping after %d errors", nfail)
134 }
135 continue
136 }
137 full := `\A(?:` + q + `)\z`
138 refull, err = tryCompile(full)
139 if err != nil {
140
141 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
142 }
143 input = str
144 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
145
146 ncase++
147 if re == nil {
148
149 continue
150 }
151 if len(input) == 0 {
152 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
153 }
154 var text string
155 text, input = input[0], input[1:]
156 if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
157
158
159
160
161
162 continue
163 }
164 res := strings.Split(line, ";")
165 if len(res) != len(run) {
166 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
167 }
168 for i := range res {
169 have, suffix := run[i](re, refull, text)
170 want := parseResult(t, file, lineno, res[i])
171 if !slices.Equal(have, want) {
172 t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
173 if nfail++; nfail >= 100 {
174 t.Fatalf("stopping after %d errors", nfail)
175 }
176 continue
177 }
178 b, suffix := match[i](re, refull, text)
179 if b != (want != nil) {
180 t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
181 if nfail++; nfail >= 100 {
182 t.Fatalf("stopping after %d errors", nfail)
183 }
184 continue
185 }
186 }
187
188 default:
189 t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
190 }
191 }
192 if err := scanner.Err(); err != nil {
193 t.Fatalf("%s:%d: %v", file, lineno, err)
194 }
195 if len(input) != 0 {
196 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
197 }
198 t.Logf("%d cases tested", ncase)
199 }
200
201 var run = []func(*Regexp, *Regexp, string) ([]int, string){
202 runFull,
203 runPartial,
204 runFullLongest,
205 runPartialLongest,
206 }
207
208 func runFull(re, refull *Regexp, text string) ([]int, string) {
209 refull.longest = false
210 return refull.FindStringSubmatchIndex(text), "[full]"
211 }
212
213 func runPartial(re, refull *Regexp, text string) ([]int, string) {
214 re.longest = false
215 return re.FindStringSubmatchIndex(text), ""
216 }
217
218 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
219 refull.longest = true
220 return refull.FindStringSubmatchIndex(text), "[full,longest]"
221 }
222
223 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
224 re.longest = true
225 return re.FindStringSubmatchIndex(text), "[longest]"
226 }
227
228 var match = []func(*Regexp, *Regexp, string) (bool, string){
229 matchFull,
230 matchPartial,
231 matchFullLongest,
232 matchPartialLongest,
233 }
234
235 func matchFull(re, refull *Regexp, text string) (bool, string) {
236 refull.longest = false
237 return refull.MatchString(text), "[full]"
238 }
239
240 func matchPartial(re, refull *Regexp, text string) (bool, string) {
241 re.longest = false
242 return re.MatchString(text), ""
243 }
244
245 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
246 refull.longest = true
247 return refull.MatchString(text), "[full,longest]"
248 }
249
250 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
251 re.longest = true
252 return re.MatchString(text), "[longest]"
253 }
254
255 func isSingleBytes(s string) bool {
256 for _, c := range s {
257 if c >= utf8.RuneSelf {
258 return false
259 }
260 }
261 return true
262 }
263
264 func tryCompile(s string) (re *Regexp, err error) {
265
266 defer func() {
267 if r := recover(); r != nil {
268 err = fmt.Errorf("panic: %v", r)
269 }
270 }()
271 return Compile(s)
272 }
273
274 func parseResult(t *testing.T, file string, lineno int, res string) []int {
275
276 if res == "-" {
277 return nil
278 }
279
280 n := 1
281 for j := 0; j < len(res); j++ {
282 if res[j] == ' ' {
283 n++
284 }
285 }
286 out := make([]int, 2*n)
287 i := 0
288 n = 0
289 for j := 0; j <= len(res); j++ {
290 if j == len(res) || res[j] == ' ' {
291
292 pair := res[i:j]
293 if pair == "-" {
294 out[n] = -1
295 out[n+1] = -1
296 } else {
297 loStr, hiStr, _ := strings.Cut(pair, "-")
298 lo, err1 := strconv.Atoi(loStr)
299 hi, err2 := strconv.Atoi(hiStr)
300 if err1 != nil || err2 != nil || lo > hi {
301 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
302 }
303 out[n] = lo
304 out[n+1] = hi
305 }
306 n += 2
307 i = j + 1
308 }
309 }
310 return out
311 }
312
313
314
315
316 func TestFowler(t *testing.T) {
317 files, err := filepath.Glob("testdata/*.dat")
318 if err != nil {
319 t.Fatal(err)
320 }
321 for _, file := range files {
322 t.Log(file)
323 testFowler(t, file)
324 }
325 }
326
327 var notab = MustCompilePOSIX(`[^\t]+`)
328
329 func testFowler(t *testing.T, file string) {
330 f, err := os.Open(file)
331 if err != nil {
332 t.Error(err)
333 return
334 }
335 defer f.Close()
336 b := bufio.NewReader(f)
337 lineno := 0
338 lastRegexp := ""
339 Reading:
340 for {
341 lineno++
342 line, err := b.ReadString('\n')
343 if err != nil {
344 if err != io.EOF {
345 t.Errorf("%s:%d: %v", file, lineno, err)
346 }
347 break Reading
348 }
349
350
351
352
353
354
355
356
357 if line[0] == '#' || line[0] == '\n' {
358 continue Reading
359 }
360 line = line[:len(line)-1]
361 field := notab.FindAllString(line, -1)
362 for i, f := range field {
363 if f == "NULL" {
364 field[i] = ""
365 }
366 if f == "NIL" {
367 t.Logf("%s:%d: skip: %s", file, lineno, line)
368 continue Reading
369 }
370 }
371 if len(field) == 0 {
372 continue Reading
373 }
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435 flag := field[0]
436 switch flag[0] {
437 case '?', '&', '|', ';', '{', '}':
438
439
440 flag = flag[1:]
441 if flag == "" {
442 continue Reading
443 }
444 case ':':
445 var ok bool
446 if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
447 t.Logf("skip: %s", line)
448 continue Reading
449 }
450 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
451 t.Logf("skip: %s", line)
452 continue Reading
453 }
454
455
456 if len(field) < 4 {
457 t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
458 continue Reading
459 }
460
461
462 if strings.Contains(flag, "$") {
463 f := `"` + field[1] + `"`
464 if field[1], err = strconv.Unquote(f); err != nil {
465 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
466 }
467 f = `"` + field[2] + `"`
468 if field[2], err = strconv.Unquote(f); err != nil {
469 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
470 }
471 }
472
473
474
475
476 if field[1] == "SAME" {
477 field[1] = lastRegexp
478 }
479 lastRegexp = field[1]
480
481
482 text := field[2]
483
484
485 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
486 if !ok {
487 t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
488 continue Reading
489 }
490
491
492
493 Testing:
494
495 for _, c := range flag {
496 pattern := field[1]
497 syn := syntax.POSIX | syntax.ClassNL
498 switch c {
499 default:
500 continue Testing
501 case 'E':
502
503 case 'L':
504
505 pattern = QuoteMeta(pattern)
506 }
507
508 for _, c := range flag {
509 switch c {
510 case 'i':
511 syn |= syntax.FoldCase
512 }
513 }
514
515 re, err := compile(pattern, syn, true)
516 if err != nil {
517 if shouldCompile {
518 t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
519 }
520 continue Testing
521 }
522 if !shouldCompile {
523 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
524 continue Testing
525 }
526 match := re.MatchString(text)
527 if match != shouldMatch {
528 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
529 continue Testing
530 }
531 have := re.FindStringSubmatchIndex(text)
532 if (len(have) > 0) != match {
533 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
534 continue Testing
535 }
536 if len(have) > len(pos) {
537 have = have[:len(pos)]
538 }
539 if !slices.Equal(have, pos) {
540 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
541 }
542 }
543 }
544 }
545
546 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561 switch {
562 case s == "":
563
564 ok = true
565 compiled = true
566 matched = true
567 return
568 case s == "NOMATCH":
569
570 ok = true
571 compiled = true
572 matched = false
573 return
574 case 'A' <= s[0] && s[0] <= 'Z':
575
576 ok = true
577 compiled = false
578 return
579 }
580 compiled = true
581
582 var x []int
583 for s != "" {
584 var end byte = ')'
585 if len(x)%2 == 0 {
586 if s[0] != '(' {
587 ok = false
588 return
589 }
590 s = s[1:]
591 end = ','
592 }
593 i := 0
594 for i < len(s) && s[i] != end {
595 i++
596 }
597 if i == 0 || i == len(s) {
598 ok = false
599 return
600 }
601 var v = -1
602 var err error
603 if s[:i] != "?" {
604 v, err = strconv.Atoi(s[:i])
605 if err != nil {
606 ok = false
607 return
608 }
609 }
610 x = append(x, v)
611 s = s[i+1:]
612 }
613 if len(x)%2 != 0 {
614 ok = false
615 return
616 }
617 ok = true
618 matched = true
619 pos = x
620 return
621 }
622
623 var text []byte
624
625 func makeText(n int) []byte {
626 if len(text) >= n {
627 return text[:n]
628 }
629 text = make([]byte, n)
630 x := ^uint32(0)
631 for i := range text {
632 x += x
633 x ^= 1
634 if int32(x) < 0 {
635 x ^= 0x88888eef
636 }
637 if x%31 == 0 {
638 text[i] = '\n'
639 } else {
640 text[i] = byte(x%(0x7E+1-0x20) + 0x20)
641 }
642 }
643 return text
644 }
645
646 func BenchmarkMatch(b *testing.B) {
647 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
648
649 for _, data := range benchData {
650 r := MustCompile(data.re)
651 for _, size := range benchSizes {
652 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
653 continue
654 }
655 t := makeText(size.n)
656 b.Run(data.name+"/"+size.name, func(b *testing.B) {
657 b.SetBytes(int64(size.n))
658 for i := 0; i < b.N; i++ {
659 if r.Match(t) {
660 b.Fatal("match!")
661 }
662 }
663 })
664 }
665 }
666 }
667
668 func BenchmarkMatch_onepass_regex(b *testing.B) {
669 isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
670 r := MustCompile(`(?s)\A.*\z`)
671 if r.onepass == nil {
672 b.Fatalf("want onepass regex, but %q is not onepass", r)
673 }
674 for _, size := range benchSizes {
675 if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
676 continue
677 }
678 t := makeText(size.n)
679 b.Run(size.name, func(b *testing.B) {
680 b.SetBytes(int64(size.n))
681 b.ReportAllocs()
682 for i := 0; i < b.N; i++ {
683 if !r.Match(t) {
684 b.Fatal("not match!")
685 }
686 }
687 })
688 }
689 }
690
691 var benchData = []struct{ name, re string }{
692 {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
693 {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
694 {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
695 {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
696 {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
697 {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
698 }
699
700 var benchSizes = []struct {
701 name string
702 n int
703 }{
704 {"16", 16},
705 {"32", 32},
706 {"1K", 1 << 10},
707 {"32K", 32 << 10},
708 {"1M", 1 << 20},
709 {"32M", 32 << 20},
710 }
711
712 func TestLongest(t *testing.T) {
713 re, err := Compile(`a(|b)`)
714 if err != nil {
715 t.Fatal(err)
716 }
717 if g, w := re.FindString("ab"), "a"; g != w {
718 t.Errorf("first match was %q, want %q", g, w)
719 }
720 re.Longest()
721 if g, w := re.FindString("ab"), "ab"; g != w {
722 t.Errorf("longest match was %q, want %q", g, w)
723 }
724 }
725
726
727
728 func TestProgramTooLongForBacktrack(t *testing.T) {
729 longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
730 if !longRegex.MatchString("two") {
731 t.Errorf("longRegex.MatchString(\"two\") was false, want true")
732 }
733 if longRegex.MatchString("xxx") {
734 t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
735 }
736 }
737
View as plain text