1
2
3
4
5 package parse
6
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
12 )
13
14
15 type item struct {
16 typ itemType
17 pos Pos
18 val string
19 line int
20 }
21
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
32 }
33 return fmt.Sprintf("%q", i.val)
34 }
35
36
37 type itemType int
38
39 const (
40 itemError itemType = iota
41 itemBool
42 itemChar
43 itemCharConstant
44 itemComment
45 itemComplex
46 itemAssign
47 itemDeclare
48 itemEOF
49 itemField
50 itemIdentifier
51 itemLeftDelim
52 itemLeftParen
53 itemNumber
54 itemPipe
55 itemRawString
56 itemRightDelim
57 itemRightParen
58 itemSpace
59 itemString
60 itemText
61 itemVariable
62
63 itemKeyword
64 itemBlock
65 itemBreak
66 itemContinue
67 itemDot
68 itemDefine
69 itemElse
70 itemEnd
71 itemIf
72 itemNil
73 itemRange
74 itemTemplate
75 itemWith
76 )
77
78 var key = map[string]itemType{
79 ".": itemDot,
80 "block": itemBlock,
81 "break": itemBreak,
82 "continue": itemContinue,
83 "define": itemDefine,
84 "else": itemElse,
85 "end": itemEnd,
86 "if": itemIf,
87 "range": itemRange,
88 "nil": itemNil,
89 "template": itemTemplate,
90 "with": itemWith,
91 }
92
93 const eof = -1
94
95
96
97
98
99
100
101
102
103 const (
104 spaceChars = " \t\r\n"
105 trimMarker = '-'
106 trimMarkerLen = Pos(1 + 1)
107 )
108
109
110 type stateFn func(*lexer) stateFn
111
112
113 type lexer struct {
114 name string
115 input string
116 leftDelim string
117 rightDelim string
118 pos Pos
119 start Pos
120 atEOF bool
121 parenDepth int
122 line int
123 startLine int
124 item item
125 insideAction bool
126 options lexOptions
127 }
128
129
130 type lexOptions struct {
131 emitComment bool
132 breakOK bool
133 continueOK bool
134 }
135
136
137 func (l *lexer) next() rune {
138 if int(l.pos) >= len(l.input) {
139 l.atEOF = true
140 return eof
141 }
142 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
143 l.pos += Pos(w)
144 if r == '\n' {
145 l.line++
146 }
147 return r
148 }
149
150
151 func (l *lexer) peek() rune {
152 r := l.next()
153 l.backup()
154 return r
155 }
156
157
158 func (l *lexer) backup() {
159 if !l.atEOF && l.pos > 0 {
160 r, w := utf8.DecodeLastRuneInString(l.input[:l.pos])
161 l.pos -= Pos(w)
162
163 if r == '\n' {
164 l.line--
165 }
166 }
167 }
168
169
170
171 func (l *lexer) thisItem(t itemType) item {
172 i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
173 l.start = l.pos
174 l.startLine = l.line
175 return i
176 }
177
178
179 func (l *lexer) emit(t itemType) stateFn {
180 return l.emitItem(l.thisItem(t))
181 }
182
183
184 func (l *lexer) emitItem(i item) stateFn {
185 l.item = i
186 return nil
187 }
188
189
190
191
192 func (l *lexer) ignore() {
193 l.line += strings.Count(l.input[l.start:l.pos], "\n")
194 l.start = l.pos
195 l.startLine = l.line
196 }
197
198
199 func (l *lexer) accept(valid string) bool {
200 if strings.ContainsRune(valid, l.next()) {
201 return true
202 }
203 l.backup()
204 return false
205 }
206
207
208 func (l *lexer) acceptRun(valid string) {
209 for strings.ContainsRune(valid, l.next()) {
210 }
211 l.backup()
212 }
213
214
215
216 func (l *lexer) errorf(format string, args ...any) stateFn {
217 l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
218 l.start = 0
219 l.pos = 0
220 l.input = l.input[:0]
221 return nil
222 }
223
224
225
226 func (l *lexer) nextItem() item {
227 l.item = item{itemEOF, l.pos, "EOF", l.startLine}
228 state := lexText
229 if l.insideAction {
230 state = lexInsideAction
231 }
232 for {
233 state = state(l)
234 if state == nil {
235 return l.item
236 }
237 }
238 }
239
240
241 func lex(name, input, left, right string) *lexer {
242 if left == "" {
243 left = leftDelim
244 }
245 if right == "" {
246 right = rightDelim
247 }
248 l := &lexer{
249 name: name,
250 input: input,
251 leftDelim: left,
252 rightDelim: right,
253 line: 1,
254 startLine: 1,
255 insideAction: false,
256 }
257 return l
258 }
259
260
261
262 const (
263 leftDelim = "{{"
264 rightDelim = "}}"
265 leftComment = "/*"
266 rightComment = "*/"
267 )
268
269
270 func lexText(l *lexer) stateFn {
271 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
272 if x > 0 {
273 l.pos += Pos(x)
274
275 trimLength := Pos(0)
276 delimEnd := l.pos + Pos(len(l.leftDelim))
277 if hasLeftTrimMarker(l.input[delimEnd:]) {
278 trimLength = rightTrimLength(l.input[l.start:l.pos])
279 }
280 l.pos -= trimLength
281 l.line += strings.Count(l.input[l.start:l.pos], "\n")
282 i := l.thisItem(itemText)
283 l.pos += trimLength
284 l.ignore()
285 if len(i.val) > 0 {
286 return l.emitItem(i)
287 }
288 }
289 return lexLeftDelim
290 }
291 l.pos = Pos(len(l.input))
292
293 if l.pos > l.start {
294 l.line += strings.Count(l.input[l.start:l.pos], "\n")
295 return l.emit(itemText)
296 }
297 return l.emit(itemEOF)
298 }
299
300
301 func rightTrimLength(s string) Pos {
302 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
303 }
304
305
306 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
307 if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
308 return true, true
309 }
310 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
311 return true, false
312 }
313 return false, false
314 }
315
316
317 func leftTrimLength(s string) Pos {
318 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
319 }
320
321
322
323 func lexLeftDelim(l *lexer) stateFn {
324 l.pos += Pos(len(l.leftDelim))
325 trimSpace := hasLeftTrimMarker(l.input[l.pos:])
326 afterMarker := Pos(0)
327 if trimSpace {
328 afterMarker = trimMarkerLen
329 }
330 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
331 l.pos += afterMarker
332 l.ignore()
333 return lexComment
334 }
335 i := l.thisItem(itemLeftDelim)
336 l.insideAction = true
337 l.pos += afterMarker
338 l.ignore()
339 l.parenDepth = 0
340 return l.emitItem(i)
341 }
342
343
344 func lexComment(l *lexer) stateFn {
345 l.pos += Pos(len(leftComment))
346 x := strings.Index(l.input[l.pos:], rightComment)
347 if x < 0 {
348 return l.errorf("unclosed comment")
349 }
350 l.pos += Pos(x + len(rightComment))
351 delim, trimSpace := l.atRightDelim()
352 if !delim {
353 return l.errorf("comment ends before closing delimiter")
354 }
355 l.line += strings.Count(l.input[l.start:l.pos], "\n")
356 i := l.thisItem(itemComment)
357 if trimSpace {
358 l.pos += trimMarkerLen
359 }
360 l.pos += Pos(len(l.rightDelim))
361 if trimSpace {
362 l.pos += leftTrimLength(l.input[l.pos:])
363 }
364 l.ignore()
365 if l.options.emitComment {
366 return l.emitItem(i)
367 }
368 return lexText
369 }
370
371
372 func lexRightDelim(l *lexer) stateFn {
373 _, trimSpace := l.atRightDelim()
374 if trimSpace {
375 l.pos += trimMarkerLen
376 l.ignore()
377 }
378 l.pos += Pos(len(l.rightDelim))
379 i := l.thisItem(itemRightDelim)
380 if trimSpace {
381 l.pos += leftTrimLength(l.input[l.pos:])
382 l.ignore()
383 }
384 l.insideAction = false
385 return l.emitItem(i)
386 }
387
388
389 func lexInsideAction(l *lexer) stateFn {
390
391
392
393 delim, _ := l.atRightDelim()
394 if delim {
395 if l.parenDepth == 0 {
396 return lexRightDelim
397 }
398 return l.errorf("unclosed left paren")
399 }
400 switch r := l.next(); {
401 case r == eof:
402 return l.errorf("unclosed action")
403 case isSpace(r):
404 l.backup()
405 return lexSpace
406 case r == '=':
407 return l.emit(itemAssign)
408 case r == ':':
409 if l.next() != '=' {
410 return l.errorf("expected :=")
411 }
412 return l.emit(itemDeclare)
413 case r == '|':
414 return l.emit(itemPipe)
415 case r == '"':
416 return lexQuote
417 case r == '`':
418 return lexRawQuote
419 case r == '$':
420 return lexVariable
421 case r == '\'':
422 return lexChar
423 case r == '.':
424
425 if l.pos < Pos(len(l.input)) {
426 r := l.input[l.pos]
427 if r < '0' || '9' < r {
428 return lexField
429 }
430 }
431 fallthrough
432 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
433 l.backup()
434 return lexNumber
435 case isAlphaNumeric(r):
436 l.backup()
437 return lexIdentifier
438 case r == '(':
439 l.parenDepth++
440 return l.emit(itemLeftParen)
441 case r == ')':
442 l.parenDepth--
443 if l.parenDepth < 0 {
444 return l.errorf("unexpected right paren")
445 }
446 return l.emit(itemRightParen)
447 case r <= unicode.MaxASCII && unicode.IsPrint(r):
448 return l.emit(itemChar)
449 default:
450 return l.errorf("unrecognized character in action: %#U", r)
451 }
452 }
453
454
455
456
457 func lexSpace(l *lexer) stateFn {
458 var r rune
459 var numSpaces int
460 for {
461 r = l.peek()
462 if !isSpace(r) {
463 break
464 }
465 l.next()
466 numSpaces++
467 }
468
469
470 if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
471 l.backup()
472 if numSpaces == 1 {
473 return lexRightDelim
474 }
475 }
476 return l.emit(itemSpace)
477 }
478
479
480 func lexIdentifier(l *lexer) stateFn {
481 for {
482 switch r := l.next(); {
483 case isAlphaNumeric(r):
484
485 default:
486 l.backup()
487 word := l.input[l.start:l.pos]
488 if !l.atTerminator() {
489 return l.errorf("bad character %#U", r)
490 }
491 switch {
492 case key[word] > itemKeyword:
493 item := key[word]
494 if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
495 return l.emit(itemIdentifier)
496 }
497 return l.emit(item)
498 case word[0] == '.':
499 return l.emit(itemField)
500 case word == "true", word == "false":
501 return l.emit(itemBool)
502 default:
503 return l.emit(itemIdentifier)
504 }
505 }
506 }
507 }
508
509
510
511 func lexField(l *lexer) stateFn {
512 return lexFieldOrVariable(l, itemField)
513 }
514
515
516
517 func lexVariable(l *lexer) stateFn {
518 if l.atTerminator() {
519 return l.emit(itemVariable)
520 }
521 return lexFieldOrVariable(l, itemVariable)
522 }
523
524
525
526 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
527 if l.atTerminator() {
528 if typ == itemVariable {
529 return l.emit(itemVariable)
530 }
531 return l.emit(itemDot)
532 }
533 var r rune
534 for {
535 r = l.next()
536 if !isAlphaNumeric(r) {
537 l.backup()
538 break
539 }
540 }
541 if !l.atTerminator() {
542 return l.errorf("bad character %#U", r)
543 }
544 return l.emit(typ)
545 }
546
547
548
549
550
551 func (l *lexer) atTerminator() bool {
552 r := l.peek()
553 if isSpace(r) {
554 return true
555 }
556 switch r {
557 case eof, '.', ',', '|', ':', ')', '(':
558 return true
559 }
560 return strings.HasPrefix(l.input[l.pos:], l.rightDelim)
561 }
562
563
564
565 func lexChar(l *lexer) stateFn {
566 Loop:
567 for {
568 switch l.next() {
569 case '\\':
570 if r := l.next(); r != eof && r != '\n' {
571 break
572 }
573 fallthrough
574 case eof, '\n':
575 return l.errorf("unterminated character constant")
576 case '\'':
577 break Loop
578 }
579 }
580 return l.emit(itemCharConstant)
581 }
582
583
584
585
586
587 func lexNumber(l *lexer) stateFn {
588 if !l.scanNumber() {
589 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
590 }
591 if sign := l.peek(); sign == '+' || sign == '-' {
592
593 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
594 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
595 }
596 return l.emit(itemComplex)
597 }
598 return l.emit(itemNumber)
599 }
600
601 func (l *lexer) scanNumber() bool {
602
603 l.accept("+-")
604
605 digits := "0123456789_"
606 if l.accept("0") {
607
608 if l.accept("xX") {
609 digits = "0123456789abcdefABCDEF_"
610 } else if l.accept("oO") {
611 digits = "01234567_"
612 } else if l.accept("bB") {
613 digits = "01_"
614 }
615 }
616 l.acceptRun(digits)
617 if l.accept(".") {
618 l.acceptRun(digits)
619 }
620 if len(digits) == 10+1 && l.accept("eE") {
621 l.accept("+-")
622 l.acceptRun("0123456789_")
623 }
624 if len(digits) == 16+6+1 && l.accept("pP") {
625 l.accept("+-")
626 l.acceptRun("0123456789_")
627 }
628
629 l.accept("i")
630
631 if isAlphaNumeric(l.peek()) {
632 l.next()
633 return false
634 }
635 return true
636 }
637
638
639 func lexQuote(l *lexer) stateFn {
640 Loop:
641 for {
642 switch l.next() {
643 case '\\':
644 if r := l.next(); r != eof && r != '\n' {
645 break
646 }
647 fallthrough
648 case eof, '\n':
649 return l.errorf("unterminated quoted string")
650 case '"':
651 break Loop
652 }
653 }
654 return l.emit(itemString)
655 }
656
657
658 func lexRawQuote(l *lexer) stateFn {
659 Loop:
660 for {
661 switch l.next() {
662 case eof:
663 return l.errorf("unterminated raw quoted string")
664 case '`':
665 break Loop
666 }
667 }
668 return l.emit(itemRawString)
669 }
670
671
672 func isSpace(r rune) bool {
673 return r == ' ' || r == '\t' || r == '\r' || r == '\n'
674 }
675
676
677 func isAlphaNumeric(r rune) bool {
678 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
679 }
680
681 func hasLeftTrimMarker(s string) bool {
682 return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
683 }
684
685 func hasRightTrimMarker(s string) bool {
686 return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
687 }
688
View as plain text