1
2
3
4
5 package syntax
6
7 import (
8 "sort"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15
16 type Error struct {
17 Code ErrorCode
18 Expr string
19 }
20
21 func (e *Error) Error() string {
22 return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
23 }
24
25
26 type ErrorCode string
27
28 const (
29
30 ErrInternalError ErrorCode = "regexp/syntax: internal error"
31
32
33 ErrInvalidCharClass ErrorCode = "invalid character class"
34 ErrInvalidCharRange ErrorCode = "invalid character class range"
35 ErrInvalidEscape ErrorCode = "invalid escape sequence"
36 ErrInvalidNamedCapture ErrorCode = "invalid named capture"
37 ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
38 ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
39 ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
40 ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
41 ErrMissingBracket ErrorCode = "missing closing ]"
42 ErrMissingParen ErrorCode = "missing closing )"
43 ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
44 ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
45 ErrUnexpectedParen ErrorCode = "unexpected )"
46 ErrNestingDepth ErrorCode = "expression nests too deeply"
47 ErrLarge ErrorCode = "expression too large"
48 )
49
50 func (e ErrorCode) String() string {
51 return string(e)
52 }
53
54
55 type Flags uint16
56
57 const (
58 FoldCase Flags = 1 << iota
59 Literal
60 ClassNL
61 DotNL
62 OneLine
63 NonGreedy
64 PerlX
65 UnicodeGroups
66 WasDollar
67 Simple
68
69 MatchNL = ClassNL | DotNL
70
71 Perl = ClassNL | OneLine | PerlX | UnicodeGroups
72 POSIX Flags = 0
73 )
74
75
76 const (
77 opLeftParen = opPseudo + iota
78 opVerticalBar
79 )
80
81
82
83
84
85
86
87
88
89
90
91
92
93 const maxHeight = 1000
94
95
96
97
98
99
100
101 const (
102 maxSize = 128 << 20 / instSize
103 instSize = 5 * 8
104 )
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 const (
122 maxRunes = 128 << 20 / runeSize
123 runeSize = 4
124 )
125
126 type parser struct {
127 flags Flags
128 stack []*Regexp
129 free *Regexp
130 numCap int
131 wholeRegexp string
132 tmpClass []rune
133 numRegexp int
134 numRunes int
135 repeats int64
136 height map[*Regexp]int
137 size map[*Regexp]int64
138 }
139
140 func (p *parser) newRegexp(op Op) *Regexp {
141 re := p.free
142 if re != nil {
143 p.free = re.Sub0[0]
144 *re = Regexp{}
145 } else {
146 re = new(Regexp)
147 p.numRegexp++
148 }
149 re.Op = op
150 return re
151 }
152
153 func (p *parser) reuse(re *Regexp) {
154 if p.height != nil {
155 delete(p.height, re)
156 }
157 re.Sub0[0] = p.free
158 p.free = re
159 }
160
161 func (p *parser) checkLimits(re *Regexp) {
162 if p.numRunes > maxRunes {
163 panic(ErrLarge)
164 }
165 p.checkSize(re)
166 p.checkHeight(re)
167 }
168
169 func (p *parser) checkSize(re *Regexp) {
170 if p.size == nil {
171
172
173
174
175
176 if p.repeats == 0 {
177 p.repeats = 1
178 }
179 if re.Op == OpRepeat {
180 n := re.Max
181 if n == -1 {
182 n = re.Min
183 }
184 if n <= 0 {
185 n = 1
186 }
187 if int64(n) > maxSize/p.repeats {
188 p.repeats = maxSize
189 } else {
190 p.repeats *= int64(n)
191 }
192 }
193 if int64(p.numRegexp) < maxSize/p.repeats {
194 return
195 }
196
197
198
199
200 p.size = make(map[*Regexp]int64)
201 for _, re := range p.stack {
202 p.checkSize(re)
203 }
204 }
205
206 if p.calcSize(re, true) > maxSize {
207 panic(ErrLarge)
208 }
209 }
210
211 func (p *parser) calcSize(re *Regexp, force bool) int64 {
212 if !force {
213 if size, ok := p.size[re]; ok {
214 return size
215 }
216 }
217
218 var size int64
219 switch re.Op {
220 case OpLiteral:
221 size = int64(len(re.Rune))
222 case OpCapture, OpStar:
223
224 size = 2 + p.calcSize(re.Sub[0], false)
225 case OpPlus, OpQuest:
226 size = 1 + p.calcSize(re.Sub[0], false)
227 case OpConcat:
228 for _, sub := range re.Sub {
229 size += p.calcSize(sub, false)
230 }
231 case OpAlternate:
232 for _, sub := range re.Sub {
233 size += p.calcSize(sub, false)
234 }
235 if len(re.Sub) > 1 {
236 size += int64(len(re.Sub)) - 1
237 }
238 case OpRepeat:
239 sub := p.calcSize(re.Sub[0], false)
240 if re.Max == -1 {
241 if re.Min == 0 {
242 size = 2 + sub
243 } else {
244 size = 1 + int64(re.Min)*sub
245 }
246 break
247 }
248
249 size = int64(re.Max)*sub + int64(re.Max-re.Min)
250 }
251
252 size = max(1, size)
253 p.size[re] = size
254 return size
255 }
256
257 func (p *parser) checkHeight(re *Regexp) {
258 if p.numRegexp < maxHeight {
259 return
260 }
261 if p.height == nil {
262 p.height = make(map[*Regexp]int)
263 for _, re := range p.stack {
264 p.checkHeight(re)
265 }
266 }
267 if p.calcHeight(re, true) > maxHeight {
268 panic(ErrNestingDepth)
269 }
270 }
271
272 func (p *parser) calcHeight(re *Regexp, force bool) int {
273 if !force {
274 if h, ok := p.height[re]; ok {
275 return h
276 }
277 }
278 h := 1
279 for _, sub := range re.Sub {
280 hsub := p.calcHeight(sub, false)
281 if h < 1+hsub {
282 h = 1 + hsub
283 }
284 }
285 p.height[re] = h
286 return h
287 }
288
289
290
291
292 func (p *parser) push(re *Regexp) *Regexp {
293 p.numRunes += len(re.Rune)
294 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
295
296 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
297 return nil
298 }
299 re.Op = OpLiteral
300 re.Rune = re.Rune[:1]
301 re.Flags = p.flags &^ FoldCase
302 } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
303 re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
304 unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
305 unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
306 re.Op == OpCharClass && len(re.Rune) == 2 &&
307 re.Rune[0]+1 == re.Rune[1] &&
308 unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
309 unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
310
311 if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
312 return nil
313 }
314
315
316 re.Op = OpLiteral
317 re.Rune = re.Rune[:1]
318 re.Flags = p.flags | FoldCase
319 } else {
320
321 p.maybeConcat(-1, 0)
322 }
323
324 p.stack = append(p.stack, re)
325 p.checkLimits(re)
326 return re
327 }
328
329
330
331
332
333
334
335
336
337
338 func (p *parser) maybeConcat(r rune, flags Flags) bool {
339 n := len(p.stack)
340 if n < 2 {
341 return false
342 }
343
344 re1 := p.stack[n-1]
345 re2 := p.stack[n-2]
346 if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
347 return false
348 }
349
350
351 re2.Rune = append(re2.Rune, re1.Rune...)
352
353
354 if r >= 0 {
355 re1.Rune = re1.Rune0[:1]
356 re1.Rune[0] = r
357 re1.Flags = flags
358 return true
359 }
360
361 p.stack = p.stack[:n-1]
362 p.reuse(re1)
363 return false
364 }
365
366
367 func (p *parser) literal(r rune) {
368 re := p.newRegexp(OpLiteral)
369 re.Flags = p.flags
370 if p.flags&FoldCase != 0 {
371 r = minFoldRune(r)
372 }
373 re.Rune0[0] = r
374 re.Rune = re.Rune0[:1]
375 p.push(re)
376 }
377
378
379 func minFoldRune(r rune) rune {
380 if r < minFold || r > maxFold {
381 return r
382 }
383 m := r
384 r0 := r
385 for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
386 m = min(m, r)
387 }
388 return m
389 }
390
391
392
393 func (p *parser) op(op Op) *Regexp {
394 re := p.newRegexp(op)
395 re.Flags = p.flags
396 return p.push(re)
397 }
398
399
400
401
402
403 func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
404 flags := p.flags
405 if p.flags&PerlX != 0 {
406 if len(after) > 0 && after[0] == '?' {
407 after = after[1:]
408 flags ^= NonGreedy
409 }
410 if lastRepeat != "" {
411
412
413
414 return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
415 }
416 }
417 n := len(p.stack)
418 if n == 0 {
419 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
420 }
421 sub := p.stack[n-1]
422 if sub.Op >= opPseudo {
423 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
424 }
425
426 re := p.newRegexp(op)
427 re.Min = min
428 re.Max = max
429 re.Flags = flags
430 re.Sub = re.Sub0[:1]
431 re.Sub[0] = sub
432 p.stack[n-1] = re
433 p.checkLimits(re)
434
435 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
436 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
437 }
438
439 return after, nil
440 }
441
442
443
444
445
446
447
448
449
450
451 func repeatIsValid(re *Regexp, n int) bool {
452 if re.Op == OpRepeat {
453 m := re.Max
454 if m == 0 {
455 return true
456 }
457 if m < 0 {
458 m = re.Min
459 }
460 if m > n {
461 return false
462 }
463 if m > 0 {
464 n /= m
465 }
466 }
467 for _, sub := range re.Sub {
468 if !repeatIsValid(sub, n) {
469 return false
470 }
471 }
472 return true
473 }
474
475
476 func (p *parser) concat() *Regexp {
477 p.maybeConcat(-1, 0)
478
479
480 i := len(p.stack)
481 for i > 0 && p.stack[i-1].Op < opPseudo {
482 i--
483 }
484 subs := p.stack[i:]
485 p.stack = p.stack[:i]
486
487
488 if len(subs) == 0 {
489 return p.push(p.newRegexp(OpEmptyMatch))
490 }
491
492 return p.push(p.collapse(subs, OpConcat))
493 }
494
495
496 func (p *parser) alternate() *Regexp {
497
498
499 i := len(p.stack)
500 for i > 0 && p.stack[i-1].Op < opPseudo {
501 i--
502 }
503 subs := p.stack[i:]
504 p.stack = p.stack[:i]
505
506
507
508 if len(subs) > 0 {
509 cleanAlt(subs[len(subs)-1])
510 }
511
512
513
514 if len(subs) == 0 {
515 return p.push(p.newRegexp(OpNoMatch))
516 }
517
518 return p.push(p.collapse(subs, OpAlternate))
519 }
520
521
522 func cleanAlt(re *Regexp) {
523 switch re.Op {
524 case OpCharClass:
525 re.Rune = cleanClass(&re.Rune)
526 if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
527 re.Rune = nil
528 re.Op = OpAnyChar
529 return
530 }
531 if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
532 re.Rune = nil
533 re.Op = OpAnyCharNotNL
534 return
535 }
536 if cap(re.Rune)-len(re.Rune) > 100 {
537
538
539 re.Rune = append(re.Rune0[:0], re.Rune...)
540 }
541 }
542 }
543
544
545
546
547
548 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
549 if len(subs) == 1 {
550 return subs[0]
551 }
552 re := p.newRegexp(op)
553 re.Sub = re.Sub0[:0]
554 for _, sub := range subs {
555 if sub.Op == op {
556 re.Sub = append(re.Sub, sub.Sub...)
557 p.reuse(sub)
558 } else {
559 re.Sub = append(re.Sub, sub)
560 }
561 }
562 if op == OpAlternate {
563 re.Sub = p.factor(re.Sub)
564 if len(re.Sub) == 1 {
565 old := re
566 re = re.Sub[0]
567 p.reuse(old)
568 }
569 }
570 return re
571 }
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588 func (p *parser) factor(sub []*Regexp) []*Regexp {
589 if len(sub) < 2 {
590 return sub
591 }
592
593
594 var str []rune
595 var strflags Flags
596 start := 0
597 out := sub[:0]
598 for i := 0; i <= len(sub); i++ {
599
600
601
602
603
604
605 var istr []rune
606 var iflags Flags
607 if i < len(sub) {
608 istr, iflags = p.leadingString(sub[i])
609 if iflags == strflags {
610 same := 0
611 for same < len(str) && same < len(istr) && str[same] == istr[same] {
612 same++
613 }
614 if same > 0 {
615
616
617 str = str[:same]
618 continue
619 }
620 }
621 }
622
623
624
625
626
627
628 if i == start {
629
630 } else if i == start+1 {
631
632 out = append(out, sub[start])
633 } else {
634
635 prefix := p.newRegexp(OpLiteral)
636 prefix.Flags = strflags
637 prefix.Rune = append(prefix.Rune[:0], str...)
638
639 for j := start; j < i; j++ {
640 sub[j] = p.removeLeadingString(sub[j], len(str))
641 p.checkLimits(sub[j])
642 }
643 suffix := p.collapse(sub[start:i], OpAlternate)
644
645 re := p.newRegexp(OpConcat)
646 re.Sub = append(re.Sub[:0], prefix, suffix)
647 out = append(out, re)
648 }
649
650
651 start = i
652 str = istr
653 strflags = iflags
654 }
655 sub = out
656
657
658
659
660
661
662
663
664
665 start = 0
666 out = sub[:0]
667 var first *Regexp
668 for i := 0; i <= len(sub); i++ {
669
670
671
672
673
674 var ifirst *Regexp
675 if i < len(sub) {
676 ifirst = p.leadingRegexp(sub[i])
677 if first != nil && first.Equal(ifirst) &&
678
679 (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
680 continue
681 }
682 }
683
684
685
686
687
688 if i == start {
689
690 } else if i == start+1 {
691
692 out = append(out, sub[start])
693 } else {
694
695 prefix := first
696 for j := start; j < i; j++ {
697 reuse := j != start
698 sub[j] = p.removeLeadingRegexp(sub[j], reuse)
699 p.checkLimits(sub[j])
700 }
701 suffix := p.collapse(sub[start:i], OpAlternate)
702
703 re := p.newRegexp(OpConcat)
704 re.Sub = append(re.Sub[:0], prefix, suffix)
705 out = append(out, re)
706 }
707
708
709 start = i
710 first = ifirst
711 }
712 sub = out
713
714
715 start = 0
716 out = sub[:0]
717 for i := 0; i <= len(sub); i++ {
718
719
720
721
722
723
724 if i < len(sub) && isCharClass(sub[i]) {
725 continue
726 }
727
728
729
730 if i == start {
731
732 } else if i == start+1 {
733 out = append(out, sub[start])
734 } else {
735
736
737 max := start
738 for j := start + 1; j < i; j++ {
739 if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
740 max = j
741 }
742 }
743 sub[start], sub[max] = sub[max], sub[start]
744
745 for j := start + 1; j < i; j++ {
746 mergeCharClass(sub[start], sub[j])
747 p.reuse(sub[j])
748 }
749 cleanAlt(sub[start])
750 out = append(out, sub[start])
751 }
752
753
754 if i < len(sub) {
755 out = append(out, sub[i])
756 }
757 start = i + 1
758 }
759 sub = out
760
761
762 start = 0
763 out = sub[:0]
764 for i := range sub {
765 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
766 continue
767 }
768 out = append(out, sub[i])
769 }
770 sub = out
771
772 return sub
773 }
774
775
776
777 func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
778 if re.Op == OpConcat && len(re.Sub) > 0 {
779 re = re.Sub[0]
780 }
781 if re.Op != OpLiteral {
782 return nil, 0
783 }
784 return re.Rune, re.Flags & FoldCase
785 }
786
787
788
789 func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
790 if re.Op == OpConcat && len(re.Sub) > 0 {
791
792
793 sub := re.Sub[0]
794 sub = p.removeLeadingString(sub, n)
795 re.Sub[0] = sub
796 if sub.Op == OpEmptyMatch {
797 p.reuse(sub)
798 switch len(re.Sub) {
799 case 0, 1:
800
801 re.Op = OpEmptyMatch
802 re.Sub = nil
803 case 2:
804 old := re
805 re = re.Sub[1]
806 p.reuse(old)
807 default:
808 copy(re.Sub, re.Sub[1:])
809 re.Sub = re.Sub[:len(re.Sub)-1]
810 }
811 }
812 return re
813 }
814
815 if re.Op == OpLiteral {
816 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
817 if len(re.Rune) == 0 {
818 re.Op = OpEmptyMatch
819 }
820 }
821 return re
822 }
823
824
825
826 func (p *parser) leadingRegexp(re *Regexp) *Regexp {
827 if re.Op == OpEmptyMatch {
828 return nil
829 }
830 if re.Op == OpConcat && len(re.Sub) > 0 {
831 sub := re.Sub[0]
832 if sub.Op == OpEmptyMatch {
833 return nil
834 }
835 return sub
836 }
837 return re
838 }
839
840
841
842
843 func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
844 if re.Op == OpConcat && len(re.Sub) > 0 {
845 if reuse {
846 p.reuse(re.Sub[0])
847 }
848 re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
849 switch len(re.Sub) {
850 case 0:
851 re.Op = OpEmptyMatch
852 re.Sub = nil
853 case 1:
854 old := re
855 re = re.Sub[0]
856 p.reuse(old)
857 }
858 return re
859 }
860 if reuse {
861 p.reuse(re)
862 }
863 return p.newRegexp(OpEmptyMatch)
864 }
865
866 func literalRegexp(s string, flags Flags) *Regexp {
867 re := &Regexp{Op: OpLiteral}
868 re.Flags = flags
869 re.Rune = re.Rune0[:0]
870 for _, c := range s {
871 if len(re.Rune) >= cap(re.Rune) {
872
873 re.Rune = []rune(s)
874 break
875 }
876 re.Rune = append(re.Rune, c)
877 }
878 return re
879 }
880
881
882
883
884
885
886 func Parse(s string, flags Flags) (*Regexp, error) {
887 return parse(s, flags)
888 }
889
890 func parse(s string, flags Flags) (_ *Regexp, err error) {
891 defer func() {
892 switch r := recover(); r {
893 default:
894 panic(r)
895 case nil:
896
897 case ErrLarge:
898 err = &Error{Code: ErrLarge, Expr: s}
899 case ErrNestingDepth:
900 err = &Error{Code: ErrNestingDepth, Expr: s}
901 }
902 }()
903
904 if flags&Literal != 0 {
905
906 if err := checkUTF8(s); err != nil {
907 return nil, err
908 }
909 return literalRegexp(s, flags), nil
910 }
911
912
913 var (
914 p parser
915 c rune
916 op Op
917 lastRepeat string
918 )
919 p.flags = flags
920 p.wholeRegexp = s
921 t := s
922 for t != "" {
923 repeat := ""
924 BigSwitch:
925 switch t[0] {
926 default:
927 if c, t, err = nextRune(t); err != nil {
928 return nil, err
929 }
930 p.literal(c)
931
932 case '(':
933 if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
934
935 if t, err = p.parsePerlFlags(t); err != nil {
936 return nil, err
937 }
938 break
939 }
940 p.numCap++
941 p.op(opLeftParen).Cap = p.numCap
942 t = t[1:]
943 case '|':
944 p.parseVerticalBar()
945 t = t[1:]
946 case ')':
947 if err = p.parseRightParen(); err != nil {
948 return nil, err
949 }
950 t = t[1:]
951 case '^':
952 if p.flags&OneLine != 0 {
953 p.op(OpBeginText)
954 } else {
955 p.op(OpBeginLine)
956 }
957 t = t[1:]
958 case '$':
959 if p.flags&OneLine != 0 {
960 p.op(OpEndText).Flags |= WasDollar
961 } else {
962 p.op(OpEndLine)
963 }
964 t = t[1:]
965 case '.':
966 if p.flags&DotNL != 0 {
967 p.op(OpAnyChar)
968 } else {
969 p.op(OpAnyCharNotNL)
970 }
971 t = t[1:]
972 case '[':
973 if t, err = p.parseClass(t); err != nil {
974 return nil, err
975 }
976 case '*', '+', '?':
977 before := t
978 switch t[0] {
979 case '*':
980 op = OpStar
981 case '+':
982 op = OpPlus
983 case '?':
984 op = OpQuest
985 }
986 after := t[1:]
987 if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
988 return nil, err
989 }
990 repeat = before
991 t = after
992 case '{':
993 op = OpRepeat
994 before := t
995 min, max, after, ok := p.parseRepeat(t)
996 if !ok {
997
998 p.literal('{')
999 t = t[1:]
1000 break
1001 }
1002 if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
1003
1004 return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
1005 }
1006 if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
1007 return nil, err
1008 }
1009 repeat = before
1010 t = after
1011 case '\\':
1012 if p.flags&PerlX != 0 && len(t) >= 2 {
1013 switch t[1] {
1014 case 'A':
1015 p.op(OpBeginText)
1016 t = t[2:]
1017 break BigSwitch
1018 case 'b':
1019 p.op(OpWordBoundary)
1020 t = t[2:]
1021 break BigSwitch
1022 case 'B':
1023 p.op(OpNoWordBoundary)
1024 t = t[2:]
1025 break BigSwitch
1026 case 'C':
1027
1028 return nil, &Error{ErrInvalidEscape, t[:2]}
1029 case 'Q':
1030
1031 var lit string
1032 lit, t, _ = strings.Cut(t[2:], `\E`)
1033 for lit != "" {
1034 c, rest, err := nextRune(lit)
1035 if err != nil {
1036 return nil, err
1037 }
1038 p.literal(c)
1039 lit = rest
1040 }
1041 break BigSwitch
1042 case 'z':
1043 p.op(OpEndText)
1044 t = t[2:]
1045 break BigSwitch
1046 }
1047 }
1048
1049 re := p.newRegexp(OpCharClass)
1050 re.Flags = p.flags
1051
1052
1053 if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
1054 r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
1055 if err != nil {
1056 return nil, err
1057 }
1058 if r != nil {
1059 re.Rune = r
1060 t = rest
1061 p.push(re)
1062 break BigSwitch
1063 }
1064 }
1065
1066
1067 if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
1068 re.Rune = r
1069 t = rest
1070 p.push(re)
1071 break BigSwitch
1072 }
1073 p.reuse(re)
1074
1075
1076 if c, t, err = p.parseEscape(t); err != nil {
1077 return nil, err
1078 }
1079 p.literal(c)
1080 }
1081 lastRepeat = repeat
1082 }
1083
1084 p.concat()
1085 if p.swapVerticalBar() {
1086
1087 p.stack = p.stack[:len(p.stack)-1]
1088 }
1089 p.alternate()
1090
1091 n := len(p.stack)
1092 if n != 1 {
1093 return nil, &Error{ErrMissingParen, s}
1094 }
1095 return p.stack[0], nil
1096 }
1097
1098
1099
1100
1101 func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
1102 if s == "" || s[0] != '{' {
1103 return
1104 }
1105 s = s[1:]
1106 var ok1 bool
1107 if min, s, ok1 = p.parseInt(s); !ok1 {
1108 return
1109 }
1110 if s == "" {
1111 return
1112 }
1113 if s[0] != ',' {
1114 max = min
1115 } else {
1116 s = s[1:]
1117 if s == "" {
1118 return
1119 }
1120 if s[0] == '}' {
1121 max = -1
1122 } else if max, s, ok1 = p.parseInt(s); !ok1 {
1123 return
1124 } else if max < 0 {
1125
1126 min = -1
1127 }
1128 }
1129 if s == "" || s[0] != '}' {
1130 return
1131 }
1132 rest = s[1:]
1133 ok = true
1134 return
1135 }
1136
1137
1138
1139
1140 func (p *parser) parsePerlFlags(s string) (rest string, err error) {
1141 t := s
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158 startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<'
1159 startsWithName := len(t) > 3 && t[2] == '<'
1160
1161 if startsWithP || startsWithName {
1162
1163 exprStartPos := 4
1164 if startsWithName {
1165 exprStartPos = 3
1166 }
1167
1168
1169 end := strings.IndexRune(t, '>')
1170 if end < 0 {
1171 if err = checkUTF8(t); err != nil {
1172 return "", err
1173 }
1174 return "", &Error{ErrInvalidNamedCapture, s}
1175 }
1176
1177 capture := t[:end+1]
1178 name := t[exprStartPos:end]
1179 if err = checkUTF8(name); err != nil {
1180 return "", err
1181 }
1182 if !isValidCaptureName(name) {
1183 return "", &Error{ErrInvalidNamedCapture, capture}
1184 }
1185
1186
1187 p.numCap++
1188 re := p.op(opLeftParen)
1189 re.Cap = p.numCap
1190 re.Name = name
1191 return t[end+1:], nil
1192 }
1193
1194
1195 var c rune
1196 t = t[2:]
1197 flags := p.flags
1198 sign := +1
1199 sawFlag := false
1200 Loop:
1201 for t != "" {
1202 if c, t, err = nextRune(t); err != nil {
1203 return "", err
1204 }
1205 switch c {
1206 default:
1207 break Loop
1208
1209
1210 case 'i':
1211 flags |= FoldCase
1212 sawFlag = true
1213 case 'm':
1214 flags &^= OneLine
1215 sawFlag = true
1216 case 's':
1217 flags |= DotNL
1218 sawFlag = true
1219 case 'U':
1220 flags |= NonGreedy
1221 sawFlag = true
1222
1223
1224 case '-':
1225 if sign < 0 {
1226 break Loop
1227 }
1228 sign = -1
1229
1230
1231 flags = ^flags
1232 sawFlag = false
1233
1234
1235 case ':', ')':
1236 if sign < 0 {
1237 if !sawFlag {
1238 break Loop
1239 }
1240 flags = ^flags
1241 }
1242 if c == ':' {
1243
1244 p.op(opLeftParen)
1245 }
1246 p.flags = flags
1247 return t, nil
1248 }
1249 }
1250
1251 return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
1252 }
1253
1254
1255
1256
1257
1258
1259 func isValidCaptureName(name string) bool {
1260 if name == "" {
1261 return false
1262 }
1263 for _, c := range name {
1264 if c != '_' && !isalnum(c) {
1265 return false
1266 }
1267 }
1268 return true
1269 }
1270
1271
1272 func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
1273 if s == "" || s[0] < '0' || '9' < s[0] {
1274 return
1275 }
1276
1277 if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
1278 return
1279 }
1280 t := s
1281 for s != "" && '0' <= s[0] && s[0] <= '9' {
1282 s = s[1:]
1283 }
1284 rest = s
1285 ok = true
1286
1287 t = t[:len(t)-len(s)]
1288 for i := 0; i < len(t); i++ {
1289
1290 if n >= 1e8 {
1291 n = -1
1292 break
1293 }
1294 n = n*10 + int(t[i]) - '0'
1295 }
1296 return
1297 }
1298
1299
1300
1301 func isCharClass(re *Regexp) bool {
1302 return re.Op == OpLiteral && len(re.Rune) == 1 ||
1303 re.Op == OpCharClass ||
1304 re.Op == OpAnyCharNotNL ||
1305 re.Op == OpAnyChar
1306 }
1307
1308
1309 func matchRune(re *Regexp, r rune) bool {
1310 switch re.Op {
1311 case OpLiteral:
1312 return len(re.Rune) == 1 && re.Rune[0] == r
1313 case OpCharClass:
1314 for i := 0; i < len(re.Rune); i += 2 {
1315 if re.Rune[i] <= r && r <= re.Rune[i+1] {
1316 return true
1317 }
1318 }
1319 return false
1320 case OpAnyCharNotNL:
1321 return r != '\n'
1322 case OpAnyChar:
1323 return true
1324 }
1325 return false
1326 }
1327
1328
1329 func (p *parser) parseVerticalBar() {
1330 p.concat()
1331
1332
1333
1334
1335
1336 if !p.swapVerticalBar() {
1337 p.op(opVerticalBar)
1338 }
1339 }
1340
1341
1342
1343
1344 func mergeCharClass(dst, src *Regexp) {
1345 switch dst.Op {
1346 case OpAnyChar:
1347
1348 case OpAnyCharNotNL:
1349
1350 if matchRune(src, '\n') {
1351 dst.Op = OpAnyChar
1352 }
1353 case OpCharClass:
1354
1355 if src.Op == OpLiteral {
1356 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1357 } else {
1358 dst.Rune = appendClass(dst.Rune, src.Rune)
1359 }
1360 case OpLiteral:
1361
1362 if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
1363 break
1364 }
1365 dst.Op = OpCharClass
1366 dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
1367 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1368 }
1369 }
1370
1371
1372
1373
1374 func (p *parser) swapVerticalBar() bool {
1375
1376
1377 n := len(p.stack)
1378 if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
1379 re1 := p.stack[n-1]
1380 re3 := p.stack[n-3]
1381
1382 if re1.Op > re3.Op {
1383 re1, re3 = re3, re1
1384 p.stack[n-3] = re3
1385 }
1386 mergeCharClass(re3, re1)
1387 p.reuse(re1)
1388 p.stack = p.stack[:n-1]
1389 return true
1390 }
1391
1392 if n >= 2 {
1393 re1 := p.stack[n-1]
1394 re2 := p.stack[n-2]
1395 if re2.Op == opVerticalBar {
1396 if n >= 3 {
1397
1398
1399 cleanAlt(p.stack[n-3])
1400 }
1401 p.stack[n-2] = re1
1402 p.stack[n-1] = re2
1403 return true
1404 }
1405 }
1406 return false
1407 }
1408
1409
1410 func (p *parser) parseRightParen() error {
1411 p.concat()
1412 if p.swapVerticalBar() {
1413
1414 p.stack = p.stack[:len(p.stack)-1]
1415 }
1416 p.alternate()
1417
1418 n := len(p.stack)
1419 if n < 2 {
1420 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1421 }
1422 re1 := p.stack[n-1]
1423 re2 := p.stack[n-2]
1424 p.stack = p.stack[:n-2]
1425 if re2.Op != opLeftParen {
1426 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1427 }
1428
1429 p.flags = re2.Flags
1430 if re2.Cap == 0 {
1431
1432 p.push(re1)
1433 } else {
1434 re2.Op = OpCapture
1435 re2.Sub = re2.Sub0[:1]
1436 re2.Sub[0] = re1
1437 p.push(re2)
1438 }
1439 return nil
1440 }
1441
1442
1443
1444 func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
1445 t := s[1:]
1446 if t == "" {
1447 return 0, "", &Error{ErrTrailingBackslash, ""}
1448 }
1449 c, t, err := nextRune(t)
1450 if err != nil {
1451 return 0, "", err
1452 }
1453
1454 Switch:
1455 switch c {
1456 default:
1457 if c < utf8.RuneSelf && !isalnum(c) {
1458
1459
1460
1461
1462 return c, t, nil
1463 }
1464
1465
1466 case '1', '2', '3', '4', '5', '6', '7':
1467
1468 if t == "" || t[0] < '0' || t[0] > '7' {
1469 break
1470 }
1471 fallthrough
1472 case '0':
1473
1474 r = c - '0'
1475 for i := 1; i < 3; i++ {
1476 if t == "" || t[0] < '0' || t[0] > '7' {
1477 break
1478 }
1479 r = r*8 + rune(t[0]) - '0'
1480 t = t[1:]
1481 }
1482 return r, t, nil
1483
1484
1485 case 'x':
1486 if t == "" {
1487 break
1488 }
1489 if c, t, err = nextRune(t); err != nil {
1490 return 0, "", err
1491 }
1492 if c == '{' {
1493
1494
1495
1496
1497 nhex := 0
1498 r = 0
1499 for {
1500 if t == "" {
1501 break Switch
1502 }
1503 if c, t, err = nextRune(t); err != nil {
1504 return 0, "", err
1505 }
1506 if c == '}' {
1507 break
1508 }
1509 v := unhex(c)
1510 if v < 0 {
1511 break Switch
1512 }
1513 r = r*16 + v
1514 if r > unicode.MaxRune {
1515 break Switch
1516 }
1517 nhex++
1518 }
1519 if nhex == 0 {
1520 break Switch
1521 }
1522 return r, t, nil
1523 }
1524
1525
1526 x := unhex(c)
1527 if c, t, err = nextRune(t); err != nil {
1528 return 0, "", err
1529 }
1530 y := unhex(c)
1531 if x < 0 || y < 0 {
1532 break
1533 }
1534 return x*16 + y, t, nil
1535
1536
1537
1538
1539
1540
1541
1542 case 'a':
1543 return '\a', t, err
1544 case 'f':
1545 return '\f', t, err
1546 case 'n':
1547 return '\n', t, err
1548 case 'r':
1549 return '\r', t, err
1550 case 't':
1551 return '\t', t, err
1552 case 'v':
1553 return '\v', t, err
1554 }
1555 return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
1556 }
1557
1558
1559
1560 func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
1561 if s == "" {
1562 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
1563 }
1564
1565
1566
1567 if s[0] == '\\' {
1568 return p.parseEscape(s)
1569 }
1570
1571 return nextRune(s)
1572 }
1573
1574 type charGroup struct {
1575 sign int
1576 class []rune
1577 }
1578
1579
1580
1581
1582
1583
1584 func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
1585 if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
1586 return
1587 }
1588 g := perlGroup[s[0:2]]
1589 if g.sign == 0 {
1590 return
1591 }
1592 return p.appendGroup(r, g), s[2:]
1593 }
1594
1595
1596
1597
1598 func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
1599 if len(s) < 2 || s[0] != '[' || s[1] != ':' {
1600 return
1601 }
1602
1603 i := strings.Index(s[2:], ":]")
1604 if i < 0 {
1605 return
1606 }
1607 i += 2
1608 name, s := s[0:i+2], s[i+2:]
1609 g := posixGroup[name]
1610 if g.sign == 0 {
1611 return nil, "", &Error{ErrInvalidCharRange, name}
1612 }
1613 return p.appendGroup(r, g), s, nil
1614 }
1615
1616 func (p *parser) appendGroup(r []rune, g charGroup) []rune {
1617 if p.flags&FoldCase == 0 {
1618 if g.sign < 0 {
1619 r = appendNegatedClass(r, g.class)
1620 } else {
1621 r = appendClass(r, g.class)
1622 }
1623 } else {
1624 tmp := p.tmpClass[:0]
1625 tmp = appendFoldedClass(tmp, g.class)
1626 p.tmpClass = tmp
1627 tmp = cleanClass(&p.tmpClass)
1628 if g.sign < 0 {
1629 r = appendNegatedClass(r, tmp)
1630 } else {
1631 r = appendClass(r, tmp)
1632 }
1633 }
1634 return r
1635 }
1636
1637 var anyTable = &unicode.RangeTable{
1638 R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
1639 R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
1640 }
1641
1642
1643
1644 func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
1645
1646 if name == "Any" {
1647 return anyTable, anyTable
1648 }
1649 if t := unicode.Categories[name]; t != nil {
1650 return t, unicode.FoldCategory[name]
1651 }
1652 if t := unicode.Scripts[name]; t != nil {
1653 return t, unicode.FoldScript[name]
1654 }
1655 return nil, nil
1656 }
1657
1658
1659
1660
1661 func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
1662 if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
1663 return
1664 }
1665
1666
1667 sign := +1
1668 if s[1] == 'P' {
1669 sign = -1
1670 }
1671 t := s[2:]
1672 c, t, err := nextRune(t)
1673 if err != nil {
1674 return
1675 }
1676 var seq, name string
1677 if c != '{' {
1678
1679 seq = s[:len(s)-len(t)]
1680 name = seq[2:]
1681 } else {
1682
1683 end := strings.IndexRune(s, '}')
1684 if end < 0 {
1685 if err = checkUTF8(s); err != nil {
1686 return
1687 }
1688 return nil, "", &Error{ErrInvalidCharRange, s}
1689 }
1690 seq, t = s[:end+1], s[end+1:]
1691 name = s[3:end]
1692 if err = checkUTF8(name); err != nil {
1693 return
1694 }
1695 }
1696
1697
1698 if name != "" && name[0] == '^' {
1699 sign = -sign
1700 name = name[1:]
1701 }
1702
1703 tab, fold := unicodeTable(name)
1704 if tab == nil {
1705 return nil, "", &Error{ErrInvalidCharRange, seq}
1706 }
1707
1708 if p.flags&FoldCase == 0 || fold == nil {
1709 if sign > 0 {
1710 r = appendTable(r, tab)
1711 } else {
1712 r = appendNegatedTable(r, tab)
1713 }
1714 } else {
1715
1716
1717
1718 tmp := p.tmpClass[:0]
1719 tmp = appendTable(tmp, tab)
1720 tmp = appendTable(tmp, fold)
1721 p.tmpClass = tmp
1722 tmp = cleanClass(&p.tmpClass)
1723 if sign > 0 {
1724 r = appendClass(r, tmp)
1725 } else {
1726 r = appendNegatedClass(r, tmp)
1727 }
1728 }
1729 return r, t, nil
1730 }
1731
1732
1733
1734 func (p *parser) parseClass(s string) (rest string, err error) {
1735 t := s[1:]
1736 re := p.newRegexp(OpCharClass)
1737 re.Flags = p.flags
1738 re.Rune = re.Rune0[:0]
1739
1740 sign := +1
1741 if t != "" && t[0] == '^' {
1742 sign = -1
1743 t = t[1:]
1744
1745
1746
1747 if p.flags&ClassNL == 0 {
1748 re.Rune = append(re.Rune, '\n', '\n')
1749 }
1750 }
1751
1752 class := re.Rune
1753 first := true
1754 for t == "" || t[0] != ']' || first {
1755
1756
1757 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
1758 _, size := utf8.DecodeRuneInString(t[1:])
1759 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
1760 }
1761 first = false
1762
1763
1764 if len(t) > 2 && t[0] == '[' && t[1] == ':' {
1765 nclass, nt, err := p.parseNamedClass(t, class)
1766 if err != nil {
1767 return "", err
1768 }
1769 if nclass != nil {
1770 class, t = nclass, nt
1771 continue
1772 }
1773 }
1774
1775
1776 nclass, nt, err := p.parseUnicodeClass(t, class)
1777 if err != nil {
1778 return "", err
1779 }
1780 if nclass != nil {
1781 class, t = nclass, nt
1782 continue
1783 }
1784
1785
1786 if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
1787 class, t = nclass, nt
1788 continue
1789 }
1790
1791
1792 rng := t
1793 var lo, hi rune
1794 if lo, t, err = p.parseClassChar(t, s); err != nil {
1795 return "", err
1796 }
1797 hi = lo
1798
1799 if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
1800 t = t[1:]
1801 if hi, t, err = p.parseClassChar(t, s); err != nil {
1802 return "", err
1803 }
1804 if hi < lo {
1805 rng = rng[:len(rng)-len(t)]
1806 return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
1807 }
1808 }
1809 if p.flags&FoldCase == 0 {
1810 class = appendRange(class, lo, hi)
1811 } else {
1812 class = appendFoldedRange(class, lo, hi)
1813 }
1814 }
1815 t = t[1:]
1816
1817
1818 re.Rune = class
1819 class = cleanClass(&re.Rune)
1820 if sign < 0 {
1821 class = negateClass(class)
1822 }
1823 re.Rune = class
1824 p.push(re)
1825 return t, nil
1826 }
1827
1828
1829
1830 func cleanClass(rp *[]rune) []rune {
1831
1832
1833 sort.Sort(ranges{rp})
1834
1835 r := *rp
1836 if len(r) < 2 {
1837 return r
1838 }
1839
1840
1841 w := 2
1842 for i := 2; i < len(r); i += 2 {
1843 lo, hi := r[i], r[i+1]
1844 if lo <= r[w-1]+1 {
1845
1846 if hi > r[w-1] {
1847 r[w-1] = hi
1848 }
1849 continue
1850 }
1851
1852 r[w] = lo
1853 r[w+1] = hi
1854 w += 2
1855 }
1856
1857 return r[:w]
1858 }
1859
1860
1861
1862 func inCharClass(r rune, class []rune) bool {
1863 _, ok := sort.Find(len(class)/2, func(i int) int {
1864 lo, hi := class[2*i], class[2*i+1]
1865 if r > hi {
1866 return +1
1867 }
1868 if r < lo {
1869 return -1
1870 }
1871 return 0
1872 })
1873 return ok
1874 }
1875
1876
1877 func appendLiteral(r []rune, x rune, flags Flags) []rune {
1878 if flags&FoldCase != 0 {
1879 return appendFoldedRange(r, x, x)
1880 }
1881 return appendRange(r, x, x)
1882 }
1883
1884
1885 func appendRange(r []rune, lo, hi rune) []rune {
1886
1887
1888
1889
1890 n := len(r)
1891 for i := 2; i <= 4; i += 2 {
1892 if n >= i {
1893 rlo, rhi := r[n-i], r[n-i+1]
1894 if lo <= rhi+1 && rlo <= hi+1 {
1895 if lo < rlo {
1896 r[n-i] = lo
1897 }
1898 if hi > rhi {
1899 r[n-i+1] = hi
1900 }
1901 return r
1902 }
1903 }
1904 }
1905
1906 return append(r, lo, hi)
1907 }
1908
1909 const (
1910
1911
1912 minFold = 0x0041
1913 maxFold = 0x1e943
1914 )
1915
1916
1917
1918 func appendFoldedRange(r []rune, lo, hi rune) []rune {
1919
1920 if lo <= minFold && hi >= maxFold {
1921
1922 return appendRange(r, lo, hi)
1923 }
1924 if hi < minFold || lo > maxFold {
1925
1926 return appendRange(r, lo, hi)
1927 }
1928 if lo < minFold {
1929
1930 r = appendRange(r, lo, minFold-1)
1931 lo = minFold
1932 }
1933 if hi > maxFold {
1934
1935 r = appendRange(r, maxFold+1, hi)
1936 hi = maxFold
1937 }
1938
1939
1940 for c := lo; c <= hi; c++ {
1941 r = appendRange(r, c, c)
1942 f := unicode.SimpleFold(c)
1943 for f != c {
1944 r = appendRange(r, f, f)
1945 f = unicode.SimpleFold(f)
1946 }
1947 }
1948 return r
1949 }
1950
1951
1952
1953 func appendClass(r []rune, x []rune) []rune {
1954 for i := 0; i < len(x); i += 2 {
1955 r = appendRange(r, x[i], x[i+1])
1956 }
1957 return r
1958 }
1959
1960
1961 func appendFoldedClass(r []rune, x []rune) []rune {
1962 for i := 0; i < len(x); i += 2 {
1963 r = appendFoldedRange(r, x[i], x[i+1])
1964 }
1965 return r
1966 }
1967
1968
1969
1970 func appendNegatedClass(r []rune, x []rune) []rune {
1971 nextLo := '\u0000'
1972 for i := 0; i < len(x); i += 2 {
1973 lo, hi := x[i], x[i+1]
1974 if nextLo <= lo-1 {
1975 r = appendRange(r, nextLo, lo-1)
1976 }
1977 nextLo = hi + 1
1978 }
1979 if nextLo <= unicode.MaxRune {
1980 r = appendRange(r, nextLo, unicode.MaxRune)
1981 }
1982 return r
1983 }
1984
1985
1986 func appendTable(r []rune, x *unicode.RangeTable) []rune {
1987 for _, xr := range x.R16 {
1988 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1989 if stride == 1 {
1990 r = appendRange(r, lo, hi)
1991 continue
1992 }
1993 for c := lo; c <= hi; c += stride {
1994 r = appendRange(r, c, c)
1995 }
1996 }
1997 for _, xr := range x.R32 {
1998 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
1999 if stride == 1 {
2000 r = appendRange(r, lo, hi)
2001 continue
2002 }
2003 for c := lo; c <= hi; c += stride {
2004 r = appendRange(r, c, c)
2005 }
2006 }
2007 return r
2008 }
2009
2010
2011 func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
2012 nextLo := '\u0000'
2013 for _, xr := range x.R16 {
2014 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2015 if stride == 1 {
2016 if nextLo <= lo-1 {
2017 r = appendRange(r, nextLo, lo-1)
2018 }
2019 nextLo = hi + 1
2020 continue
2021 }
2022 for c := lo; c <= hi; c += stride {
2023 if nextLo <= c-1 {
2024 r = appendRange(r, nextLo, c-1)
2025 }
2026 nextLo = c + 1
2027 }
2028 }
2029 for _, xr := range x.R32 {
2030 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2031 if stride == 1 {
2032 if nextLo <= lo-1 {
2033 r = appendRange(r, nextLo, lo-1)
2034 }
2035 nextLo = hi + 1
2036 continue
2037 }
2038 for c := lo; c <= hi; c += stride {
2039 if nextLo <= c-1 {
2040 r = appendRange(r, nextLo, c-1)
2041 }
2042 nextLo = c + 1
2043 }
2044 }
2045 if nextLo <= unicode.MaxRune {
2046 r = appendRange(r, nextLo, unicode.MaxRune)
2047 }
2048 return r
2049 }
2050
2051
2052
2053 func negateClass(r []rune) []rune {
2054 nextLo := '\u0000'
2055 w := 0
2056 for i := 0; i < len(r); i += 2 {
2057 lo, hi := r[i], r[i+1]
2058 if nextLo <= lo-1 {
2059 r[w] = nextLo
2060 r[w+1] = lo - 1
2061 w += 2
2062 }
2063 nextLo = hi + 1
2064 }
2065 r = r[:w]
2066 if nextLo <= unicode.MaxRune {
2067
2068
2069 r = append(r, nextLo, unicode.MaxRune)
2070 }
2071 return r
2072 }
2073
2074
2075
2076
2077
2078 type ranges struct {
2079 p *[]rune
2080 }
2081
2082 func (ra ranges) Less(i, j int) bool {
2083 p := *ra.p
2084 i *= 2
2085 j *= 2
2086 return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
2087 }
2088
2089 func (ra ranges) Len() int {
2090 return len(*ra.p) / 2
2091 }
2092
2093 func (ra ranges) Swap(i, j int) {
2094 p := *ra.p
2095 i *= 2
2096 j *= 2
2097 p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
2098 }
2099
2100 func checkUTF8(s string) error {
2101 for s != "" {
2102 rune, size := utf8.DecodeRuneInString(s)
2103 if rune == utf8.RuneError && size == 1 {
2104 return &Error{Code: ErrInvalidUTF8, Expr: s}
2105 }
2106 s = s[size:]
2107 }
2108 return nil
2109 }
2110
2111 func nextRune(s string) (c rune, t string, err error) {
2112 c, size := utf8.DecodeRuneInString(s)
2113 if c == utf8.RuneError && size == 1 {
2114 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
2115 }
2116 return c, s[size:], nil
2117 }
2118
2119 func isalnum(c rune) bool {
2120 return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
2121 }
2122
2123 func unhex(c rune) rune {
2124 if '0' <= c && c <= '9' {
2125 return c - '0'
2126 }
2127 if 'a' <= c && c <= 'f' {
2128 return c - 'a' + 10
2129 }
2130 if 'A' <= c && c <= 'F' {
2131 return c - 'A' + 10
2132 }
2133 return -1
2134 }
2135
View as plain text