Source file
src/regexp/onepass.go
1
2
3
4
5 package regexp
6
7 import (
8 "regexp/syntax"
9 "slices"
10 "strings"
11 "unicode"
12 "unicode/utf8"
13 )
14
15
16
17
18
19
20
21
22
23 type onePassProg struct {
24 Inst []onePassInst
25 Start int
26 NumCap int
27 }
28
29
30
31 type onePassInst struct {
32 syntax.Inst
33 Next []uint32
34 }
35
36
37
38
39
40
41 func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
42 i := &p.Inst[p.Start]
43 if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
44 return "", i.Op == syntax.InstMatch, uint32(p.Start)
45 }
46 pc = i.Out
47 i = &p.Inst[pc]
48 for i.Op == syntax.InstNop {
49 pc = i.Out
50 i = &p.Inst[pc]
51 }
52
53 if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
54 return "", i.Op == syntax.InstMatch, uint32(p.Start)
55 }
56
57
58 var buf strings.Builder
59 for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 && i.Rune[0] != utf8.RuneError {
60 buf.WriteRune(i.Rune[0])
61 pc, i = i.Out, &p.Inst[i.Out]
62 }
63 if i.Op == syntax.InstEmptyWidth &&
64 syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
65 p.Inst[i.Out].Op == syntax.InstMatch {
66 complete = true
67 }
68 return buf.String(), complete, pc
69 }
70
71
72
73
74
75 func onePassNext(i *onePassInst, r rune) uint32 {
76 next := i.MatchRunePos(r)
77 if next >= 0 {
78 return i.Next[next]
79 }
80 if i.Op == syntax.InstAltMatch {
81 return i.Out
82 }
83 return 0
84 }
85
86 func iop(i *syntax.Inst) syntax.InstOp {
87 op := i.Op
88 switch op {
89 case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
90 op = syntax.InstRune
91 }
92 return op
93 }
94
95
96 type queueOnePass struct {
97 sparse []uint32
98 dense []uint32
99 size, nextIndex uint32
100 }
101
102 func (q *queueOnePass) empty() bool {
103 return q.nextIndex >= q.size
104 }
105
106 func (q *queueOnePass) next() (n uint32) {
107 n = q.dense[q.nextIndex]
108 q.nextIndex++
109 return
110 }
111
112 func (q *queueOnePass) clear() {
113 q.size = 0
114 q.nextIndex = 0
115 }
116
117 func (q *queueOnePass) contains(u uint32) bool {
118 if u >= uint32(len(q.sparse)) {
119 return false
120 }
121 return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
122 }
123
124 func (q *queueOnePass) insert(u uint32) {
125 if !q.contains(u) {
126 q.insertNew(u)
127 }
128 }
129
130 func (q *queueOnePass) insertNew(u uint32) {
131 if u >= uint32(len(q.sparse)) {
132 return
133 }
134 q.sparse[u] = q.size
135 q.dense[q.size] = u
136 q.size++
137 }
138
139 func newQueue(size int) (q *queueOnePass) {
140 return &queueOnePass{
141 sparse: make([]uint32, size),
142 dense: make([]uint32, size),
143 }
144 }
145
146
147
148
149
150
151 const mergeFailed = uint32(0xffffffff)
152
153 var (
154 noRune = []rune{}
155 noNext = []uint32{mergeFailed}
156 )
157
158 func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
159 leftLen := len(*leftRunes)
160 rightLen := len(*rightRunes)
161 if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
162 panic("mergeRuneSets odd length []rune")
163 }
164 var (
165 lx, rx int
166 )
167 merged := make([]rune, 0)
168 next := make([]uint32, 0)
169 ok := true
170 defer func() {
171 if !ok {
172 merged = nil
173 next = nil
174 }
175 }()
176
177 ix := -1
178 extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
179 if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
180 return false
181 }
182 merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
183 *newLow += 2
184 ix += 2
185 next = append(next, pc)
186 return true
187 }
188
189 for lx < leftLen || rx < rightLen {
190 switch {
191 case rx >= rightLen:
192 ok = extend(&lx, leftRunes, leftPC)
193 case lx >= leftLen:
194 ok = extend(&rx, rightRunes, rightPC)
195 case (*rightRunes)[rx] < (*leftRunes)[lx]:
196 ok = extend(&rx, rightRunes, rightPC)
197 default:
198 ok = extend(&lx, leftRunes, leftPC)
199 }
200 if !ok {
201 return noRune, noNext
202 }
203 }
204 return merged, next
205 }
206
207
208 func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
209 for ix, instOriginal := range original.Inst {
210 switch instOriginal.Op {
211 case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
212 case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
213 prog.Inst[ix].Next = nil
214 case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
215 prog.Inst[ix].Next = nil
216 prog.Inst[ix] = onePassInst{Inst: instOriginal}
217 }
218 }
219 }
220
221
222 func onePassCopy(prog *syntax.Prog) *onePassProg {
223 p := &onePassProg{
224 Start: prog.Start,
225 NumCap: prog.NumCap,
226 Inst: make([]onePassInst, len(prog.Inst)),
227 }
228 for i, inst := range prog.Inst {
229 p.Inst[i] = onePassInst{Inst: inst}
230 }
231
232
233
234
235
236
237 for pc := range p.Inst {
238 switch p.Inst[pc].Op {
239 default:
240 continue
241 case syntax.InstAlt, syntax.InstAltMatch:
242
243 p_A_Other := &p.Inst[pc].Out
244 p_A_Alt := &p.Inst[pc].Arg
245
246 instAlt := p.Inst[*p_A_Alt]
247 if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
248 p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
249 instAlt = p.Inst[*p_A_Alt]
250 if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
251 continue
252 }
253 }
254 instOther := p.Inst[*p_A_Other]
255
256 if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
257
258 continue
259 }
260
261
262 p_B_Alt := &p.Inst[*p_A_Alt].Out
263 p_B_Other := &p.Inst[*p_A_Alt].Arg
264 patch := false
265 if instAlt.Out == uint32(pc) {
266 patch = true
267 } else if instAlt.Arg == uint32(pc) {
268 patch = true
269 p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
270 }
271 if patch {
272 *p_B_Alt = *p_A_Other
273 }
274
275
276
277 if *p_A_Other == *p_B_Alt {
278 *p_A_Alt = *p_B_Other
279 }
280 }
281 }
282 return p
283 }
284
285 var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
286 var anyRune = []rune{0, unicode.MaxRune}
287
288
289
290
291
292
293 func makeOnePass(p *onePassProg) *onePassProg {
294
295 if len(p.Inst) >= 1000 {
296 return nil
297 }
298
299 var (
300 instQueue = newQueue(len(p.Inst))
301 visitQueue = newQueue(len(p.Inst))
302 check func(uint32, []bool) bool
303 onePassRunes = make([][]rune, len(p.Inst))
304 )
305
306
307
308 check = func(pc uint32, m []bool) (ok bool) {
309 ok = true
310 inst := &p.Inst[pc]
311 if visitQueue.contains(pc) {
312 return
313 }
314 visitQueue.insert(pc)
315 switch inst.Op {
316 case syntax.InstAlt, syntax.InstAltMatch:
317 ok = check(inst.Out, m) && check(inst.Arg, m)
318
319 matchOut := m[inst.Out]
320 matchArg := m[inst.Arg]
321 if matchOut && matchArg {
322 ok = false
323 break
324 }
325
326 if matchArg {
327 inst.Out, inst.Arg = inst.Arg, inst.Out
328 matchOut, matchArg = matchArg, matchOut
329 }
330 if matchOut {
331 m[pc] = true
332 inst.Op = syntax.InstAltMatch
333 }
334
335
336 onePassRunes[pc], inst.Next = mergeRuneSets(
337 &onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
338 if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
339 ok = false
340 break
341 }
342 case syntax.InstCapture, syntax.InstNop:
343 ok = check(inst.Out, m)
344 m[pc] = m[inst.Out]
345
346 onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
347 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
348 for i := range inst.Next {
349 inst.Next[i] = inst.Out
350 }
351 case syntax.InstEmptyWidth:
352 ok = check(inst.Out, m)
353 m[pc] = m[inst.Out]
354 onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
355 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
356 for i := range inst.Next {
357 inst.Next[i] = inst.Out
358 }
359 case syntax.InstMatch, syntax.InstFail:
360 m[pc] = inst.Op == syntax.InstMatch
361 case syntax.InstRune:
362 m[pc] = false
363 if len(inst.Next) > 0 {
364 break
365 }
366 instQueue.insert(inst.Out)
367 if len(inst.Rune) == 0 {
368 onePassRunes[pc] = []rune{}
369 inst.Next = []uint32{inst.Out}
370 break
371 }
372 runes := make([]rune, 0)
373 if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
374 r0 := inst.Rune[0]
375 runes = append(runes, r0, r0)
376 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
377 runes = append(runes, r1, r1)
378 }
379 slices.Sort(runes)
380 } else {
381 runes = append(runes, inst.Rune...)
382 }
383 onePassRunes[pc] = runes
384 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
385 for i := range inst.Next {
386 inst.Next[i] = inst.Out
387 }
388 inst.Op = syntax.InstRune
389 case syntax.InstRune1:
390 m[pc] = false
391 if len(inst.Next) > 0 {
392 break
393 }
394 instQueue.insert(inst.Out)
395 runes := []rune{}
396
397 if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
398 r0 := inst.Rune[0]
399 runes = append(runes, r0, r0)
400 for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
401 runes = append(runes, r1, r1)
402 }
403 slices.Sort(runes)
404 } else {
405 runes = append(runes, inst.Rune[0], inst.Rune[0])
406 }
407 onePassRunes[pc] = runes
408 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
409 for i := range inst.Next {
410 inst.Next[i] = inst.Out
411 }
412 inst.Op = syntax.InstRune
413 case syntax.InstRuneAny:
414 m[pc] = false
415 if len(inst.Next) > 0 {
416 break
417 }
418 instQueue.insert(inst.Out)
419 onePassRunes[pc] = append([]rune{}, anyRune...)
420 inst.Next = []uint32{inst.Out}
421 case syntax.InstRuneAnyNotNL:
422 m[pc] = false
423 if len(inst.Next) > 0 {
424 break
425 }
426 instQueue.insert(inst.Out)
427 onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
428 inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
429 for i := range inst.Next {
430 inst.Next[i] = inst.Out
431 }
432 }
433 return
434 }
435
436 instQueue.clear()
437 instQueue.insert(uint32(p.Start))
438 m := make([]bool, len(p.Inst))
439 for !instQueue.empty() {
440 visitQueue.clear()
441 pc := instQueue.next()
442 if !check(pc, m) {
443 p = nil
444 break
445 }
446 }
447 if p != nil {
448 for i := range p.Inst {
449 p.Inst[i].Rune = onePassRunes[i]
450 }
451 }
452 return p
453 }
454
455
456
457
458
459 func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
460 if prog.Start == 0 {
461 return nil
462 }
463
464 if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
465 syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
466 return nil
467 }
468 hasAlt := false
469 for _, inst := range prog.Inst {
470 if inst.Op == syntax.InstAlt || inst.Op == syntax.InstAltMatch {
471 hasAlt = true
472 break
473 }
474 }
475
476
477 for _, inst := range prog.Inst {
478 opOut := prog.Inst[inst.Out].Op
479 switch inst.Op {
480 default:
481 if opOut == syntax.InstMatch && hasAlt {
482 return nil
483 }
484 case syntax.InstAlt, syntax.InstAltMatch:
485 if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
486 return nil
487 }
488 case syntax.InstEmptyWidth:
489 if opOut == syntax.InstMatch {
490 if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
491 continue
492 }
493 return nil
494 }
495 }
496 }
497
498
499 p = onePassCopy(prog)
500
501
502 p = makeOnePass(p)
503
504 if p != nil {
505 cleanupOnePass(p, prog)
506 }
507 return p
508 }
509
View as plain text