1
2
3
4
5 package modfile
6
7 import (
8 "bytes"
9 "errors"
10 "fmt"
11 "os"
12 "strconv"
13 "strings"
14 "unicode"
15 "unicode/utf8"
16 )
17
18
19
20 type Position struct {
21 Line int
22 LineRune int
23 Byte int
24 }
25
26
27 func (p Position) add(s string) Position {
28 p.Byte += len(s)
29 if n := strings.Count(s, "\n"); n > 0 {
30 p.Line += n
31 s = s[strings.LastIndex(s, "\n")+1:]
32 p.LineRune = 1
33 }
34 p.LineRune += utf8.RuneCountInString(s)
35 return p
36 }
37
38
39 type Expr interface {
40
41
42 Span() (start, end Position)
43
44
45
46
47 Comment() *Comments
48 }
49
50
51 type Comment struct {
52 Start Position
53 Token string
54 Suffix bool
55 }
56
57
58 type Comments struct {
59 Before []Comment
60 Suffix []Comment
61
62
63
64 After []Comment
65 }
66
67
68
69
70
71 func (c *Comments) Comment() *Comments {
72 return c
73 }
74
75
76 type FileSyntax struct {
77 Name string
78 Comments
79 Stmt []Expr
80 }
81
82 func (x *FileSyntax) Span() (start, end Position) {
83 if len(x.Stmt) == 0 {
84 return
85 }
86 start, _ = x.Stmt[0].Span()
87 _, end = x.Stmt[len(x.Stmt)-1].Span()
88 return start, end
89 }
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104 func (x *FileSyntax) addLine(hint Expr, tokens ...string) *Line {
105 if hint == nil {
106
107 Loop:
108 for i := len(x.Stmt) - 1; i >= 0; i-- {
109 stmt := x.Stmt[i]
110 switch stmt := stmt.(type) {
111 case *Line:
112 if stmt.Token != nil && stmt.Token[0] == tokens[0] {
113 hint = stmt
114 break Loop
115 }
116 case *LineBlock:
117 if stmt.Token[0] == tokens[0] {
118 hint = stmt
119 break Loop
120 }
121 }
122 }
123 }
124
125 newLineAfter := func(i int) *Line {
126 new := &Line{Token: tokens}
127 if i == len(x.Stmt) {
128 x.Stmt = append(x.Stmt, new)
129 } else {
130 x.Stmt = append(x.Stmt, nil)
131 copy(x.Stmt[i+2:], x.Stmt[i+1:])
132 x.Stmt[i+1] = new
133 }
134 return new
135 }
136
137 if hint != nil {
138 for i, stmt := range x.Stmt {
139 switch stmt := stmt.(type) {
140 case *Line:
141 if stmt == hint {
142 if stmt.Token == nil || stmt.Token[0] != tokens[0] {
143 return newLineAfter(i)
144 }
145
146
147 stmt.InBlock = true
148 block := &LineBlock{Token: stmt.Token[:1], Line: []*Line{stmt}}
149 stmt.Token = stmt.Token[1:]
150 x.Stmt[i] = block
151 new := &Line{Token: tokens[1:], InBlock: true}
152 block.Line = append(block.Line, new)
153 return new
154 }
155
156 case *LineBlock:
157 if stmt == hint {
158 if stmt.Token[0] != tokens[0] {
159 return newLineAfter(i)
160 }
161
162 new := &Line{Token: tokens[1:], InBlock: true}
163 stmt.Line = append(stmt.Line, new)
164 return new
165 }
166
167 for j, line := range stmt.Line {
168 if line == hint {
169 if stmt.Token[0] != tokens[0] {
170 return newLineAfter(i)
171 }
172
173
174 stmt.Line = append(stmt.Line, nil)
175 copy(stmt.Line[j+2:], stmt.Line[j+1:])
176 new := &Line{Token: tokens[1:], InBlock: true}
177 stmt.Line[j+1] = new
178 return new
179 }
180 }
181 }
182 }
183 }
184
185 new := &Line{Token: tokens}
186 x.Stmt = append(x.Stmt, new)
187 return new
188 }
189
190 func (x *FileSyntax) updateLine(line *Line, tokens ...string) {
191 if line.InBlock {
192 tokens = tokens[1:]
193 }
194 line.Token = tokens
195 }
196
197
198
199 func (line *Line) markRemoved() {
200 line.Token = nil
201 line.Comments.Suffix = nil
202 }
203
204
205
206
207
208
209 func (x *FileSyntax) Cleanup() {
210 w := 0
211 for _, stmt := range x.Stmt {
212 switch stmt := stmt.(type) {
213 case *Line:
214 if stmt.Token == nil {
215 continue
216 }
217 case *LineBlock:
218 ww := 0
219 for _, line := range stmt.Line {
220 if line.Token != nil {
221 stmt.Line[ww] = line
222 ww++
223 }
224 }
225 if ww == 0 {
226 continue
227 }
228 if ww == 1 && len(stmt.RParen.Comments.Before) == 0 {
229
230 line := &Line{
231 Comments: Comments{
232 Before: commentsAdd(stmt.Before, stmt.Line[0].Before),
233 Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix),
234 After: commentsAdd(stmt.Line[0].After, stmt.After),
235 },
236 Token: stringsAdd(stmt.Token, stmt.Line[0].Token),
237 }
238 x.Stmt[w] = line
239 w++
240 continue
241 }
242 stmt.Line = stmt.Line[:ww]
243 }
244 x.Stmt[w] = stmt
245 w++
246 }
247 x.Stmt = x.Stmt[:w]
248 }
249
250 func commentsAdd(x, y []Comment) []Comment {
251 return append(x[:len(x):len(x)], y...)
252 }
253
254 func stringsAdd(x, y []string) []string {
255 return append(x[:len(x):len(x)], y...)
256 }
257
258
259
260 type CommentBlock struct {
261 Comments
262 Start Position
263 }
264
265 func (x *CommentBlock) Span() (start, end Position) {
266 return x.Start, x.Start
267 }
268
269
270 type Line struct {
271 Comments
272 Start Position
273 Token []string
274 InBlock bool
275 End Position
276 }
277
278 func (x *Line) Span() (start, end Position) {
279 return x.Start, x.End
280 }
281
282
283
284
285
286
287
288 type LineBlock struct {
289 Comments
290 Start Position
291 LParen LParen
292 Token []string
293 Line []*Line
294 RParen RParen
295 }
296
297 func (x *LineBlock) Span() (start, end Position) {
298 return x.Start, x.RParen.Pos.add(")")
299 }
300
301
302
303 type LParen struct {
304 Comments
305 Pos Position
306 }
307
308 func (x *LParen) Span() (start, end Position) {
309 return x.Pos, x.Pos.add(")")
310 }
311
312
313
314 type RParen struct {
315 Comments
316 Pos Position
317 }
318
319 func (x *RParen) Span() (start, end Position) {
320 return x.Pos, x.Pos.add(")")
321 }
322
323
324 type input struct {
325
326 filename string
327 complete []byte
328 remaining []byte
329 tokenStart []byte
330 token token
331 pos Position
332 comments []Comment
333
334
335 file *FileSyntax
336 parseErrors ErrorList
337
338
339 pre []Expr
340 post []Expr
341 }
342
343 func newInput(filename string, data []byte) *input {
344 return &input{
345 filename: filename,
346 complete: data,
347 remaining: data,
348 pos: Position{Line: 1, LineRune: 1, Byte: 0},
349 }
350 }
351
352
353 func parse(file string, data []byte) (f *FileSyntax, err error) {
354
355
356
357
358 in := newInput(file, data)
359 defer func() {
360 if e := recover(); e != nil && e != &in.parseErrors {
361 in.parseErrors = append(in.parseErrors, Error{
362 Filename: in.filename,
363 Pos: in.pos,
364 Err: fmt.Errorf("internal error: %v", e),
365 })
366 }
367 if err == nil && len(in.parseErrors) > 0 {
368 err = in.parseErrors
369 }
370 }()
371
372
373
374 in.readToken()
375
376
377 in.parseFile()
378 if len(in.parseErrors) > 0 {
379 return nil, in.parseErrors
380 }
381 in.file.Name = in.filename
382
383
384 in.assignComments()
385
386 return in.file, nil
387 }
388
389
390
391 func (in *input) Error(s string) {
392 in.parseErrors = append(in.parseErrors, Error{
393 Filename: in.filename,
394 Pos: in.pos,
395 Err: errors.New(s),
396 })
397 panic(&in.parseErrors)
398 }
399
400
401 func (in *input) eof() bool {
402 return len(in.remaining) == 0
403 }
404
405
406 func (in *input) peekRune() int {
407 if len(in.remaining) == 0 {
408 return 0
409 }
410 r, _ := utf8.DecodeRune(in.remaining)
411 return int(r)
412 }
413
414
415 func (in *input) peekPrefix(prefix string) bool {
416
417
418 for i := 0; i < len(prefix); i++ {
419 if i >= len(in.remaining) || in.remaining[i] != prefix[i] {
420 return false
421 }
422 }
423 return true
424 }
425
426
427 func (in *input) readRune() int {
428 if len(in.remaining) == 0 {
429 in.Error("internal lexer error: readRune at EOF")
430 }
431 r, size := utf8.DecodeRune(in.remaining)
432 in.remaining = in.remaining[size:]
433 if r == '\n' {
434 in.pos.Line++
435 in.pos.LineRune = 1
436 } else {
437 in.pos.LineRune++
438 }
439 in.pos.Byte += size
440 return int(r)
441 }
442
443 type token struct {
444 kind tokenKind
445 pos Position
446 endPos Position
447 text string
448 }
449
450 type tokenKind int
451
452 const (
453 _EOF tokenKind = -(iota + 1)
454 _EOLCOMMENT
455 _IDENT
456 _STRING
457 _COMMENT
458
459
460 )
461
462 func (k tokenKind) isComment() bool {
463 return k == _COMMENT || k == _EOLCOMMENT
464 }
465
466
467 func (k tokenKind) isEOL() bool {
468 return k == _EOF || k == _EOLCOMMENT || k == '\n'
469 }
470
471
472
473
474 func (in *input) startToken() {
475 in.tokenStart = in.remaining
476 in.token.text = ""
477 in.token.pos = in.pos
478 }
479
480
481
482
483 func (in *input) endToken(kind tokenKind) {
484 in.token.kind = kind
485 text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
486 if kind.isComment() {
487 if strings.HasSuffix(text, "\r\n") {
488 text = text[:len(text)-2]
489 } else {
490 text = strings.TrimSuffix(text, "\n")
491 }
492 }
493 in.token.text = text
494 in.token.endPos = in.pos
495 }
496
497
498 func (in *input) peek() tokenKind {
499 return in.token.kind
500 }
501
502
503 func (in *input) lex() token {
504 tok := in.token
505 in.readToken()
506 return tok
507 }
508
509
510 func (in *input) readToken() {
511
512 for !in.eof() {
513 c := in.peekRune()
514 if c == ' ' || c == '\t' || c == '\r' {
515 in.readRune()
516 continue
517 }
518
519
520 if in.peekPrefix("//") {
521 in.startToken()
522
523
524
525
526 i := bytes.LastIndex(in.complete[:in.pos.Byte], []byte("\n"))
527 suffix := len(bytes.TrimSpace(in.complete[i+1:in.pos.Byte])) > 0
528 in.readRune()
529 in.readRune()
530
531
532 for len(in.remaining) > 0 && in.readRune() != '\n' {
533 }
534
535
536
537
538 if !suffix {
539 in.endToken(_COMMENT)
540 return
541 }
542
543
544 in.endToken(_EOLCOMMENT)
545 in.comments = append(in.comments, Comment{in.token.pos, in.token.text, suffix})
546 return
547 }
548
549 if in.peekPrefix("/*") {
550 in.Error("mod files must use // comments (not /* */ comments)")
551 }
552
553
554 break
555 }
556
557
558 in.startToken()
559
560
561 if in.eof() {
562 in.endToken(_EOF)
563 return
564 }
565
566
567 switch c := in.peekRune(); c {
568 case '\n', '(', ')', '[', ']', '{', '}', ',':
569 in.readRune()
570 in.endToken(tokenKind(c))
571 return
572
573 case '"', '`':
574 quote := c
575 in.readRune()
576 for {
577 if in.eof() {
578 in.pos = in.token.pos
579 in.Error("unexpected EOF in string")
580 }
581 if in.peekRune() == '\n' {
582 in.Error("unexpected newline in string")
583 }
584 c := in.readRune()
585 if c == quote {
586 break
587 }
588 if c == '\\' && quote != '`' {
589 if in.eof() {
590 in.pos = in.token.pos
591 in.Error("unexpected EOF in string")
592 }
593 in.readRune()
594 }
595 }
596 in.endToken(_STRING)
597 return
598 }
599
600
601 if c := in.peekRune(); !isIdent(c) {
602 in.Error(fmt.Sprintf("unexpected input character %#q", c))
603 }
604
605
606 for isIdent(in.peekRune()) {
607 if in.peekPrefix("//") {
608 break
609 }
610 if in.peekPrefix("/*") {
611 in.Error("mod files must use // comments (not /* */ comments)")
612 }
613 in.readRune()
614 }
615 in.endToken(_IDENT)
616 }
617
618
619
620
621 func isIdent(c int) bool {
622 switch r := rune(c); r {
623 case ' ', '(', ')', '[', ']', '{', '}', ',':
624 return false
625 default:
626 return !unicode.IsSpace(r) && unicode.IsPrint(r)
627 }
628 }
629
630
631
632
633
634
635
636
637
638
639
640 func (in *input) order(x Expr) {
641 if x != nil {
642 in.pre = append(in.pre, x)
643 }
644 switch x := x.(type) {
645 default:
646 panic(fmt.Errorf("order: unexpected type %T", x))
647 case nil:
648
649 case *LParen, *RParen:
650
651 case *CommentBlock:
652
653 case *Line:
654
655 case *FileSyntax:
656 for _, stmt := range x.Stmt {
657 in.order(stmt)
658 }
659 case *LineBlock:
660 in.order(&x.LParen)
661 for _, l := range x.Line {
662 in.order(l)
663 }
664 in.order(&x.RParen)
665 }
666 if x != nil {
667 in.post = append(in.post, x)
668 }
669 }
670
671
672 func (in *input) assignComments() {
673 const debug = false
674
675
676 in.order(in.file)
677
678
679 var line, suffix []Comment
680 for _, com := range in.comments {
681 if com.Suffix {
682 suffix = append(suffix, com)
683 } else {
684 line = append(line, com)
685 }
686 }
687
688 if debug {
689 for _, c := range line {
690 fmt.Fprintf(os.Stderr, "LINE %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
691 }
692 }
693
694
695 for _, x := range in.pre {
696 start, _ := x.Span()
697 if debug {
698 fmt.Fprintf(os.Stderr, "pre %T :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte)
699 }
700 xcom := x.Comment()
701 for len(line) > 0 && start.Byte >= line[0].Start.Byte {
702 if debug {
703 fmt.Fprintf(os.Stderr, "ASSIGN LINE %q #%d\n", line[0].Token, line[0].Start.Byte)
704 }
705 xcom.Before = append(xcom.Before, line[0])
706 line = line[1:]
707 }
708 }
709
710
711 in.file.After = append(in.file.After, line...)
712
713 if debug {
714 for _, c := range suffix {
715 fmt.Fprintf(os.Stderr, "SUFFIX %q :%d:%d #%d\n", c.Token, c.Start.Line, c.Start.LineRune, c.Start.Byte)
716 }
717 }
718
719
720 for i := len(in.post) - 1; i >= 0; i-- {
721 x := in.post[i]
722
723 start, end := x.Span()
724 if debug {
725 fmt.Fprintf(os.Stderr, "post %T :%d:%d #%d :%d:%d #%d\n", x, start.Line, start.LineRune, start.Byte, end.Line, end.LineRune, end.Byte)
726 }
727
728
729
730 switch x.(type) {
731 case *FileSyntax:
732 continue
733 }
734
735
736
737
738
739
740
741
742 if start.Line != end.Line {
743 continue
744 }
745 xcom := x.Comment()
746 for len(suffix) > 0 && end.Byte <= suffix[len(suffix)-1].Start.Byte {
747 if debug {
748 fmt.Fprintf(os.Stderr, "ASSIGN SUFFIX %q #%d\n", suffix[len(suffix)-1].Token, suffix[len(suffix)-1].Start.Byte)
749 }
750 xcom.Suffix = append(xcom.Suffix, suffix[len(suffix)-1])
751 suffix = suffix[:len(suffix)-1]
752 }
753 }
754
755
756
757
758 for _, x := range in.post {
759 reverseComments(x.Comment().Suffix)
760 }
761
762
763 in.file.Before = append(in.file.Before, suffix...)
764 }
765
766
767 func reverseComments(list []Comment) {
768 for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
769 list[i], list[j] = list[j], list[i]
770 }
771 }
772
773 func (in *input) parseFile() {
774 in.file = new(FileSyntax)
775 var cb *CommentBlock
776 for {
777 switch in.peek() {
778 case '\n':
779 in.lex()
780 if cb != nil {
781 in.file.Stmt = append(in.file.Stmt, cb)
782 cb = nil
783 }
784 case _COMMENT:
785 tok := in.lex()
786 if cb == nil {
787 cb = &CommentBlock{Start: tok.pos}
788 }
789 com := cb.Comment()
790 com.Before = append(com.Before, Comment{Start: tok.pos, Token: tok.text})
791 case _EOF:
792 if cb != nil {
793 in.file.Stmt = append(in.file.Stmt, cb)
794 }
795 return
796 default:
797 in.parseStmt()
798 if cb != nil {
799 in.file.Stmt[len(in.file.Stmt)-1].Comment().Before = cb.Before
800 cb = nil
801 }
802 }
803 }
804 }
805
806 func (in *input) parseStmt() {
807 tok := in.lex()
808 start := tok.pos
809 end := tok.endPos
810 tokens := []string{tok.text}
811 for {
812 tok := in.lex()
813 switch {
814 case tok.kind.isEOL():
815 in.file.Stmt = append(in.file.Stmt, &Line{
816 Start: start,
817 Token: tokens,
818 End: end,
819 })
820 return
821
822 case tok.kind == '(':
823 if next := in.peek(); next.isEOL() {
824
825 in.file.Stmt = append(in.file.Stmt, in.parseLineBlock(start, tokens, tok))
826 return
827 } else if next == ')' {
828 rparen := in.lex()
829 if in.peek().isEOL() {
830
831 in.lex()
832 in.file.Stmt = append(in.file.Stmt, &LineBlock{
833 Start: start,
834 Token: tokens,
835 LParen: LParen{Pos: tok.pos},
836 RParen: RParen{Pos: rparen.pos},
837 })
838 return
839 }
840
841 tokens = append(tokens, tok.text, rparen.text)
842 } else {
843
844 tokens = append(tokens, tok.text)
845 }
846
847 default:
848 tokens = append(tokens, tok.text)
849 end = tok.endPos
850 }
851 }
852 }
853
854 func (in *input) parseLineBlock(start Position, token []string, lparen token) *LineBlock {
855 x := &LineBlock{
856 Start: start,
857 Token: token,
858 LParen: LParen{Pos: lparen.pos},
859 }
860 var comments []Comment
861 for {
862 switch in.peek() {
863 case _EOLCOMMENT:
864
865 in.lex()
866 case '\n':
867
868 in.lex()
869 if len(comments) == 0 && len(x.Line) > 0 || len(comments) > 0 && comments[len(comments)-1].Token != "" {
870 comments = append(comments, Comment{})
871 }
872 case _COMMENT:
873 tok := in.lex()
874 comments = append(comments, Comment{Start: tok.pos, Token: tok.text})
875 case _EOF:
876 in.Error(fmt.Sprintf("syntax error (unterminated block started at %s:%d:%d)", in.filename, x.Start.Line, x.Start.LineRune))
877 case ')':
878 rparen := in.lex()
879 x.RParen.Before = comments
880 x.RParen.Pos = rparen.pos
881 if !in.peek().isEOL() {
882 in.Error("syntax error (expected newline after closing paren)")
883 }
884 in.lex()
885 return x
886 default:
887 l := in.parseLine()
888 x.Line = append(x.Line, l)
889 l.Comment().Before = comments
890 comments = nil
891 }
892 }
893 }
894
895 func (in *input) parseLine() *Line {
896 tok := in.lex()
897 if tok.kind.isEOL() {
898 in.Error("internal parse error: parseLine at end of line")
899 }
900 start := tok.pos
901 end := tok.endPos
902 tokens := []string{tok.text}
903 for {
904 tok := in.lex()
905 if tok.kind.isEOL() {
906 return &Line{
907 Start: start,
908 Token: tokens,
909 End: end,
910 InBlock: true,
911 }
912 }
913 tokens = append(tokens, tok.text)
914 end = tok.endPos
915 }
916 }
917
918 var (
919 slashSlash = []byte("//")
920 moduleStr = []byte("module")
921 )
922
923
924
925
926 func ModulePath(mod []byte) string {
927 for len(mod) > 0 {
928 line := mod
929 mod = nil
930 if i := bytes.IndexByte(line, '\n'); i >= 0 {
931 line, mod = line[:i], line[i+1:]
932 }
933 if i := bytes.Index(line, slashSlash); i >= 0 {
934 line = line[:i]
935 }
936 line = bytes.TrimSpace(line)
937 if !bytes.HasPrefix(line, moduleStr) {
938 continue
939 }
940 line = line[len(moduleStr):]
941 n := len(line)
942 line = bytes.TrimSpace(line)
943 if len(line) == n || len(line) == 0 {
944 continue
945 }
946
947 if line[0] == '"' || line[0] == '`' {
948 p, err := strconv.Unquote(string(line))
949 if err != nil {
950 return ""
951 }
952 return p
953 }
954
955 return string(line)
956 }
957 return ""
958 }
959
View as plain text