1
2
3
4
5 package syntax
6
7 import "unicode"
8
9
10
11
12
13
14
15
16
17
18
19 type patchList struct {
20 head, tail uint32
21 }
22
23 func makePatchList(n uint32) patchList {
24 return patchList{n, n}
25 }
26
27 func (l patchList) patch(p *Prog, val uint32) {
28 head := l.head
29 for head != 0 {
30 i := &p.Inst[head>>1]
31 if head&1 == 0 {
32 head = i.Out
33 i.Out = val
34 } else {
35 head = i.Arg
36 i.Arg = val
37 }
38 }
39 }
40
41 func (l1 patchList) append(p *Prog, l2 patchList) patchList {
42 if l1.head == 0 {
43 return l2
44 }
45 if l2.head == 0 {
46 return l1
47 }
48
49 i := &p.Inst[l1.tail>>1]
50 if l1.tail&1 == 0 {
51 i.Out = l2.head
52 } else {
53 i.Arg = l2.head
54 }
55 return patchList{l1.head, l2.tail}
56 }
57
58
59 type frag struct {
60 i uint32
61 out patchList
62 nullable bool
63 }
64
65 type compiler struct {
66 p *Prog
67 }
68
69
70
71 func Compile(re *Regexp) (*Prog, error) {
72 var c compiler
73 c.init()
74 f := c.compile(re)
75 f.out.patch(c.p, c.inst(InstMatch).i)
76 c.p.Start = int(f.i)
77 return c.p, nil
78 }
79
80 func (c *compiler) init() {
81 c.p = new(Prog)
82 c.p.NumCap = 2
83 c.inst(InstFail)
84 }
85
86 var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
87 var anyRune = []rune{0, unicode.MaxRune}
88
89 func (c *compiler) compile(re *Regexp) frag {
90 switch re.Op {
91 case OpNoMatch:
92 return c.fail()
93 case OpEmptyMatch:
94 return c.nop()
95 case OpLiteral:
96 if len(re.Rune) == 0 {
97 return c.nop()
98 }
99 var f frag
100 for j := range re.Rune {
101 f1 := c.rune(re.Rune[j:j+1], re.Flags)
102 if j == 0 {
103 f = f1
104 } else {
105 f = c.cat(f, f1)
106 }
107 }
108 return f
109 case OpCharClass:
110 return c.rune(re.Rune, re.Flags)
111 case OpAnyCharNotNL:
112 return c.rune(anyRuneNotNL, 0)
113 case OpAnyChar:
114 return c.rune(anyRune, 0)
115 case OpBeginLine:
116 return c.empty(EmptyBeginLine)
117 case OpEndLine:
118 return c.empty(EmptyEndLine)
119 case OpBeginText:
120 return c.empty(EmptyBeginText)
121 case OpEndText:
122 return c.empty(EmptyEndText)
123 case OpWordBoundary:
124 return c.empty(EmptyWordBoundary)
125 case OpNoWordBoundary:
126 return c.empty(EmptyNoWordBoundary)
127 case OpCapture:
128 bra := c.cap(uint32(re.Cap << 1))
129 sub := c.compile(re.Sub[0])
130 ket := c.cap(uint32(re.Cap<<1 | 1))
131 return c.cat(c.cat(bra, sub), ket)
132 case OpStar:
133 return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
134 case OpPlus:
135 return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
136 case OpQuest:
137 return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
138 case OpConcat:
139 if len(re.Sub) == 0 {
140 return c.nop()
141 }
142 var f frag
143 for i, sub := range re.Sub {
144 if i == 0 {
145 f = c.compile(sub)
146 } else {
147 f = c.cat(f, c.compile(sub))
148 }
149 }
150 return f
151 case OpAlternate:
152 var f frag
153 for _, sub := range re.Sub {
154 f = c.alt(f, c.compile(sub))
155 }
156 return f
157 }
158 panic("regexp: unhandled case in compile")
159 }
160
161 func (c *compiler) inst(op InstOp) frag {
162
163 f := frag{i: uint32(len(c.p.Inst)), nullable: true}
164 c.p.Inst = append(c.p.Inst, Inst{Op: op})
165 return f
166 }
167
168 func (c *compiler) nop() frag {
169 f := c.inst(InstNop)
170 f.out = makePatchList(f.i << 1)
171 return f
172 }
173
174 func (c *compiler) fail() frag {
175 return frag{}
176 }
177
178 func (c *compiler) cap(arg uint32) frag {
179 f := c.inst(InstCapture)
180 f.out = makePatchList(f.i << 1)
181 c.p.Inst[f.i].Arg = arg
182
183 if c.p.NumCap < int(arg)+1 {
184 c.p.NumCap = int(arg) + 1
185 }
186 return f
187 }
188
189 func (c *compiler) cat(f1, f2 frag) frag {
190
191 if f1.i == 0 || f2.i == 0 {
192 return frag{}
193 }
194
195
196
197 f1.out.patch(c.p, f2.i)
198 return frag{f1.i, f2.out, f1.nullable && f2.nullable}
199 }
200
201 func (c *compiler) alt(f1, f2 frag) frag {
202
203 if f1.i == 0 {
204 return f2
205 }
206 if f2.i == 0 {
207 return f1
208 }
209
210 f := c.inst(InstAlt)
211 i := &c.p.Inst[f.i]
212 i.Out = f1.i
213 i.Arg = f2.i
214 f.out = f1.out.append(c.p, f2.out)
215 f.nullable = f1.nullable || f2.nullable
216 return f
217 }
218
219 func (c *compiler) quest(f1 frag, nongreedy bool) frag {
220 f := c.inst(InstAlt)
221 i := &c.p.Inst[f.i]
222 if nongreedy {
223 i.Arg = f1.i
224 f.out = makePatchList(f.i << 1)
225 } else {
226 i.Out = f1.i
227 f.out = makePatchList(f.i<<1 | 1)
228 }
229 f.out = f.out.append(c.p, f1.out)
230 return f
231 }
232
233
234
235
236
237
238 func (c *compiler) loop(f1 frag, nongreedy bool) frag {
239 f := c.inst(InstAlt)
240 i := &c.p.Inst[f.i]
241 if nongreedy {
242 i.Arg = f1.i
243 f.out = makePatchList(f.i << 1)
244 } else {
245 i.Out = f1.i
246 f.out = makePatchList(f.i<<1 | 1)
247 }
248 f1.out.patch(c.p, f.i)
249 return f
250 }
251
252 func (c *compiler) star(f1 frag, nongreedy bool) frag {
253 if f1.nullable {
254
255
256 return c.quest(c.plus(f1, nongreedy), nongreedy)
257 }
258 return c.loop(f1, nongreedy)
259 }
260
261 func (c *compiler) plus(f1 frag, nongreedy bool) frag {
262 return frag{f1.i, c.loop(f1, nongreedy).out, f1.nullable}
263 }
264
265 func (c *compiler) empty(op EmptyOp) frag {
266 f := c.inst(InstEmptyWidth)
267 c.p.Inst[f.i].Arg = uint32(op)
268 f.out = makePatchList(f.i << 1)
269 return f
270 }
271
272 func (c *compiler) rune(r []rune, flags Flags) frag {
273 f := c.inst(InstRune)
274 f.nullable = false
275 i := &c.p.Inst[f.i]
276 i.Rune = r
277 flags &= FoldCase
278 if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] {
279
280 flags &^= FoldCase
281 }
282 i.Arg = uint32(flags)
283 f.out = makePatchList(f.i << 1)
284
285
286 switch {
287 case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]):
288 i.Op = InstRune1
289 case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune:
290 i.Op = InstRuneAny
291 case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune:
292 i.Op = InstRuneAnyNotNL
293 }
294
295 return f
296 }
297
View as plain text