Source file src/go/build/read.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package build
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"go/ast"
    13  	"go/parser"
    14  	"go/scanner"
    15  	"go/token"
    16  	"io"
    17  	"strconv"
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  	_ "unsafe" // for linkname
    22  )
    23  
    24  type importReader struct {
    25  	b    *bufio.Reader
    26  	buf  []byte
    27  	peek byte
    28  	err  error
    29  	eof  bool
    30  	nerr int
    31  	pos  token.Position
    32  }
    33  
    34  var bom = []byte{0xef, 0xbb, 0xbf}
    35  
    36  func newImportReader(name string, r io.Reader) *importReader {
    37  	b := bufio.NewReader(r)
    38  	// Remove leading UTF-8 BOM.
    39  	// Per https://golang.org/ref/spec#Source_code_representation:
    40  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    41  	// if it is the first Unicode code point in the source text.
    42  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    43  		b.Discard(3)
    44  	}
    45  	return &importReader{
    46  		b: b,
    47  		pos: token.Position{
    48  			Filename: name,
    49  			Line:     1,
    50  			Column:   1,
    51  		},
    52  	}
    53  }
    54  
    55  func isIdent(c byte) bool {
    56  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    57  }
    58  
    59  var (
    60  	errSyntax = errors.New("syntax error")
    61  	errNUL    = errors.New("unexpected NUL in input")
    62  )
    63  
    64  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    65  func (r *importReader) syntaxError() {
    66  	if r.err == nil {
    67  		r.err = errSyntax
    68  	}
    69  }
    70  
    71  // readByte reads the next byte from the input, saves it in buf, and returns it.
    72  // If an error occurs, readByte records the error in r.err and returns 0.
    73  func (r *importReader) readByte() byte {
    74  	c, err := r.b.ReadByte()
    75  	if err == nil {
    76  		r.buf = append(r.buf, c)
    77  		if c == 0 {
    78  			err = errNUL
    79  		}
    80  	}
    81  	if err != nil {
    82  		if err == io.EOF {
    83  			r.eof = true
    84  		} else if r.err == nil {
    85  			r.err = err
    86  		}
    87  		c = 0
    88  	}
    89  	return c
    90  }
    91  
    92  // readByteNoBuf is like readByte but doesn't buffer the byte.
    93  // It exhausts r.buf before reading from r.b.
    94  func (r *importReader) readByteNoBuf() byte {
    95  	var c byte
    96  	var err error
    97  	if len(r.buf) > 0 {
    98  		c = r.buf[0]
    99  		r.buf = r.buf[1:]
   100  	} else {
   101  		c, err = r.b.ReadByte()
   102  		if err == nil && c == 0 {
   103  			err = errNUL
   104  		}
   105  	}
   106  
   107  	if err != nil {
   108  		if err == io.EOF {
   109  			r.eof = true
   110  		} else if r.err == nil {
   111  			r.err = err
   112  		}
   113  		return 0
   114  	}
   115  	r.pos.Offset++
   116  	if c == '\n' {
   117  		r.pos.Line++
   118  		r.pos.Column = 1
   119  	} else {
   120  		r.pos.Column++
   121  	}
   122  	return c
   123  }
   124  
   125  // peekByte returns the next byte from the input reader but does not advance beyond it.
   126  // If skipSpace is set, peekByte skips leading spaces and comments.
   127  func (r *importReader) peekByte(skipSpace bool) byte {
   128  	if r.err != nil {
   129  		if r.nerr++; r.nerr > 10000 {
   130  			panic("go/build: import reader looping")
   131  		}
   132  		return 0
   133  	}
   134  
   135  	// Use r.peek as first input byte.
   136  	// Don't just return r.peek here: it might have been left by peekByte(false)
   137  	// and this might be peekByte(true).
   138  	c := r.peek
   139  	if c == 0 {
   140  		c = r.readByte()
   141  	}
   142  	for r.err == nil && !r.eof {
   143  		if skipSpace {
   144  			// For the purposes of this reader, semicolons are never necessary to
   145  			// understand the input and are treated as spaces.
   146  			switch c {
   147  			case ' ', '\f', '\t', '\r', '\n', ';':
   148  				c = r.readByte()
   149  				continue
   150  
   151  			case '/':
   152  				c = r.readByte()
   153  				if c == '/' {
   154  					for c != '\n' && r.err == nil && !r.eof {
   155  						c = r.readByte()
   156  					}
   157  				} else if c == '*' {
   158  					var c1 byte
   159  					for (c != '*' || c1 != '/') && r.err == nil {
   160  						if r.eof {
   161  							r.syntaxError()
   162  						}
   163  						c, c1 = c1, r.readByte()
   164  					}
   165  				} else {
   166  					r.syntaxError()
   167  				}
   168  				c = r.readByte()
   169  				continue
   170  			}
   171  		}
   172  		break
   173  	}
   174  	r.peek = c
   175  	return r.peek
   176  }
   177  
   178  // nextByte is like peekByte but advances beyond the returned byte.
   179  func (r *importReader) nextByte(skipSpace bool) byte {
   180  	c := r.peekByte(skipSpace)
   181  	r.peek = 0
   182  	return c
   183  }
   184  
   185  var goEmbed = []byte("go:embed")
   186  
   187  // findEmbed advances the input reader to the next //go:embed comment.
   188  // It reports whether it found a comment.
   189  // (Otherwise it found an error or EOF.)
   190  func (r *importReader) findEmbed(first bool) bool {
   191  	// The import block scan stopped after a non-space character,
   192  	// so the reader is not at the start of a line on the first call.
   193  	// After that, each //go:embed extraction leaves the reader
   194  	// at the end of a line.
   195  	startLine := !first
   196  	var c byte
   197  	for r.err == nil && !r.eof {
   198  		c = r.readByteNoBuf()
   199  	Reswitch:
   200  		switch c {
   201  		default:
   202  			startLine = false
   203  
   204  		case '\n':
   205  			startLine = true
   206  
   207  		case ' ', '\t':
   208  			// leave startLine alone
   209  
   210  		case '"':
   211  			startLine = false
   212  			for r.err == nil {
   213  				if r.eof {
   214  					r.syntaxError()
   215  				}
   216  				c = r.readByteNoBuf()
   217  				if c == '\\' {
   218  					r.readByteNoBuf()
   219  					if r.err != nil {
   220  						r.syntaxError()
   221  						return false
   222  					}
   223  					continue
   224  				}
   225  				if c == '"' {
   226  					c = r.readByteNoBuf()
   227  					goto Reswitch
   228  				}
   229  			}
   230  			goto Reswitch
   231  
   232  		case '`':
   233  			startLine = false
   234  			for r.err == nil {
   235  				if r.eof {
   236  					r.syntaxError()
   237  				}
   238  				c = r.readByteNoBuf()
   239  				if c == '`' {
   240  					c = r.readByteNoBuf()
   241  					goto Reswitch
   242  				}
   243  			}
   244  
   245  		case '\'':
   246  			startLine = false
   247  			for r.err == nil {
   248  				if r.eof {
   249  					r.syntaxError()
   250  				}
   251  				c = r.readByteNoBuf()
   252  				if c == '\\' {
   253  					r.readByteNoBuf()
   254  					if r.err != nil {
   255  						r.syntaxError()
   256  						return false
   257  					}
   258  					continue
   259  				}
   260  				if c == '\'' {
   261  					c = r.readByteNoBuf()
   262  					goto Reswitch
   263  				}
   264  			}
   265  
   266  		case '/':
   267  			c = r.readByteNoBuf()
   268  			switch c {
   269  			default:
   270  				startLine = false
   271  				goto Reswitch
   272  
   273  			case '*':
   274  				var c1 byte
   275  				for (c != '*' || c1 != '/') && r.err == nil {
   276  					if r.eof {
   277  						r.syntaxError()
   278  					}
   279  					c, c1 = c1, r.readByteNoBuf()
   280  				}
   281  				startLine = false
   282  
   283  			case '/':
   284  				if startLine {
   285  					// Try to read this as a //go:embed comment.
   286  					for i := range goEmbed {
   287  						c = r.readByteNoBuf()
   288  						if c != goEmbed[i] {
   289  							goto SkipSlashSlash
   290  						}
   291  					}
   292  					c = r.readByteNoBuf()
   293  					if c == ' ' || c == '\t' {
   294  						// Found one!
   295  						return true
   296  					}
   297  				}
   298  			SkipSlashSlash:
   299  				for c != '\n' && r.err == nil && !r.eof {
   300  					c = r.readByteNoBuf()
   301  				}
   302  				startLine = true
   303  			}
   304  		}
   305  	}
   306  	return false
   307  }
   308  
   309  // readKeyword reads the given keyword from the input.
   310  // If the keyword is not present, readKeyword records a syntax error.
   311  func (r *importReader) readKeyword(kw string) {
   312  	r.peekByte(true)
   313  	for i := 0; i < len(kw); i++ {
   314  		if r.nextByte(false) != kw[i] {
   315  			r.syntaxError()
   316  			return
   317  		}
   318  	}
   319  	if isIdent(r.peekByte(false)) {
   320  		r.syntaxError()
   321  	}
   322  }
   323  
   324  // readIdent reads an identifier from the input.
   325  // If an identifier is not present, readIdent records a syntax error.
   326  func (r *importReader) readIdent() {
   327  	c := r.peekByte(true)
   328  	if !isIdent(c) {
   329  		r.syntaxError()
   330  		return
   331  	}
   332  	for isIdent(r.peekByte(false)) {
   333  		r.peek = 0
   334  	}
   335  }
   336  
   337  // readString reads a quoted string literal from the input.
   338  // If an identifier is not present, readString records a syntax error.
   339  func (r *importReader) readString() {
   340  	switch r.nextByte(true) {
   341  	case '`':
   342  		for r.err == nil {
   343  			if r.nextByte(false) == '`' {
   344  				break
   345  			}
   346  			if r.eof {
   347  				r.syntaxError()
   348  			}
   349  		}
   350  	case '"':
   351  		for r.err == nil {
   352  			c := r.nextByte(false)
   353  			if c == '"' {
   354  				break
   355  			}
   356  			if r.eof || c == '\n' {
   357  				r.syntaxError()
   358  			}
   359  			if c == '\\' {
   360  				r.nextByte(false)
   361  			}
   362  		}
   363  	default:
   364  		r.syntaxError()
   365  	}
   366  }
   367  
   368  // readImport reads an import clause - optional identifier followed by quoted string -
   369  // from the input.
   370  func (r *importReader) readImport() {
   371  	c := r.peekByte(true)
   372  	if c == '.' {
   373  		r.peek = 0
   374  	} else if isIdent(c) {
   375  		r.readIdent()
   376  	}
   377  	r.readString()
   378  }
   379  
   380  // readComments is like io.ReadAll, except that it only reads the leading
   381  // block of comments in the file.
   382  //
   383  // readComments should be an internal detail,
   384  // but widely used packages access it using linkname.
   385  // Notable members of the hall of shame include:
   386  //   - github.com/bazelbuild/bazel-gazelle
   387  //
   388  // Do not remove or change the type signature.
   389  // See go.dev/issue/67401.
   390  //
   391  //go:linkname readComments
   392  func readComments(f io.Reader) ([]byte, error) {
   393  	r := newImportReader("", f)
   394  	r.peekByte(true)
   395  	if r.err == nil && !r.eof {
   396  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   397  		r.buf = r.buf[:len(r.buf)-1]
   398  	}
   399  	return r.buf, r.err
   400  }
   401  
   402  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   403  // It records what it learned in *info.
   404  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   405  // info.imports and info.embeds.
   406  //
   407  // It only returns an error if there are problems reading the file,
   408  // not for syntax errors in the file itself.
   409  func readGoInfo(f io.Reader, info *fileInfo) error {
   410  	r := newImportReader(info.name, f)
   411  
   412  	r.readKeyword("package")
   413  	r.readIdent()
   414  	for r.peekByte(true) == 'i' {
   415  		r.readKeyword("import")
   416  		if r.peekByte(true) == '(' {
   417  			r.nextByte(false)
   418  			for r.peekByte(true) != ')' && r.err == nil {
   419  				r.readImport()
   420  			}
   421  			r.nextByte(false)
   422  		} else {
   423  			r.readImport()
   424  		}
   425  	}
   426  
   427  	info.header = r.buf
   428  
   429  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   430  	// Return all but that last byte, which would cause a syntax error if we let it through.
   431  	if r.err == nil && !r.eof {
   432  		info.header = r.buf[:len(r.buf)-1]
   433  	}
   434  
   435  	// If we stopped for a syntax error, consume the whole file so that
   436  	// we are sure we don't change the errors that go/parser returns.
   437  	if r.err == errSyntax {
   438  		r.err = nil
   439  		for r.err == nil && !r.eof {
   440  			r.readByte()
   441  		}
   442  		info.header = r.buf
   443  	}
   444  	if r.err != nil {
   445  		return r.err
   446  	}
   447  
   448  	if info.fset == nil {
   449  		return nil
   450  	}
   451  
   452  	// Parse file header & record imports.
   453  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   454  	if info.parseErr != nil {
   455  		return nil
   456  	}
   457  
   458  	hasEmbed := false
   459  	for _, decl := range info.parsed.Decls {
   460  		d, ok := decl.(*ast.GenDecl)
   461  		if !ok {
   462  			continue
   463  		}
   464  		for _, dspec := range d.Specs {
   465  			spec, ok := dspec.(*ast.ImportSpec)
   466  			if !ok {
   467  				continue
   468  			}
   469  			quoted := spec.Path.Value
   470  			path, err := strconv.Unquote(quoted)
   471  			if err != nil {
   472  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   473  			}
   474  			if !isValidImport(path) {
   475  				// The parser used to return a parse error for invalid import paths, but
   476  				// no longer does, so check for and create the error here instead.
   477  				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
   478  				info.imports = nil
   479  				return nil
   480  			}
   481  			if path == "embed" {
   482  				hasEmbed = true
   483  			}
   484  
   485  			doc := spec.Doc
   486  			if doc == nil && len(d.Specs) == 1 {
   487  				doc = d.Doc
   488  			}
   489  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   490  		}
   491  	}
   492  
   493  	// Extract directives.
   494  	for _, group := range info.parsed.Comments {
   495  		if group.Pos() >= info.parsed.Package {
   496  			break
   497  		}
   498  		for _, c := range group.List {
   499  			if strings.HasPrefix(c.Text, "//go:") {
   500  				info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)})
   501  			}
   502  		}
   503  	}
   504  
   505  	// If the file imports "embed",
   506  	// we have to look for //go:embed comments
   507  	// in the remainder of the file.
   508  	// The compiler will enforce the mapping of comments to
   509  	// declared variables. We just need to know the patterns.
   510  	// If there were //go:embed comments earlier in the file
   511  	// (near the package statement or imports), the compiler
   512  	// will reject them. They can be (and have already been) ignored.
   513  	if hasEmbed {
   514  		var line []byte
   515  		for first := true; r.findEmbed(first); first = false {
   516  			line = line[:0]
   517  			pos := r.pos
   518  			for {
   519  				c := r.readByteNoBuf()
   520  				if c == '\n' || r.err != nil || r.eof {
   521  					break
   522  				}
   523  				line = append(line, c)
   524  			}
   525  			// Add args if line is well-formed.
   526  			// Ignore badly-formed lines - the compiler will report them when it finds them,
   527  			// and we can pretend they are not there to help go list succeed with what it knows.
   528  			embs, err := parseGoEmbed(string(line), pos)
   529  			if err == nil {
   530  				info.embeds = append(info.embeds, embs...)
   531  			}
   532  		}
   533  	}
   534  
   535  	return nil
   536  }
   537  
   538  // isValidImport checks if the import is a valid import using the more strict
   539  // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
   540  // It was ported from the function of the same name that was removed from the
   541  // parser in CL 424855, when the parser stopped doing these checks.
   542  func isValidImport(s string) bool {
   543  	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
   544  	for _, r := range s {
   545  		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
   546  			return false
   547  		}
   548  	}
   549  	return s != ""
   550  }
   551  
   552  // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
   553  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   554  // This is based on a similar function in cmd/compile/internal/gc/noder.go;
   555  // this version calculates position information as well.
   556  func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
   557  	trimBytes := func(n int) {
   558  		pos.Offset += n
   559  		pos.Column += utf8.RuneCountInString(args[:n])
   560  		args = args[n:]
   561  	}
   562  	trimSpace := func() {
   563  		trim := strings.TrimLeftFunc(args, unicode.IsSpace)
   564  		trimBytes(len(args) - len(trim))
   565  	}
   566  
   567  	var list []fileEmbed
   568  	for trimSpace(); args != ""; trimSpace() {
   569  		var path string
   570  		pathPos := pos
   571  	Switch:
   572  		switch args[0] {
   573  		default:
   574  			i := len(args)
   575  			for j, c := range args {
   576  				if unicode.IsSpace(c) {
   577  					i = j
   578  					break
   579  				}
   580  			}
   581  			path = args[:i]
   582  			trimBytes(i)
   583  
   584  		case '`':
   585  			var ok bool
   586  			path, _, ok = strings.Cut(args[1:], "`")
   587  			if !ok {
   588  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   589  			}
   590  			trimBytes(1 + len(path) + 1)
   591  
   592  		case '"':
   593  			i := 1
   594  			for ; i < len(args); i++ {
   595  				if args[i] == '\\' {
   596  					i++
   597  					continue
   598  				}
   599  				if args[i] == '"' {
   600  					q, err := strconv.Unquote(args[:i+1])
   601  					if err != nil {
   602  						return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
   603  					}
   604  					path = q
   605  					trimBytes(i + 1)
   606  					break Switch
   607  				}
   608  			}
   609  			if i >= len(args) {
   610  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   611  			}
   612  		}
   613  
   614  		if args != "" {
   615  			r, _ := utf8.DecodeRuneInString(args)
   616  			if !unicode.IsSpace(r) {
   617  				return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
   618  			}
   619  		}
   620  		list = append(list, fileEmbed{path, pathPos})
   621  	}
   622  	return list, nil
   623  }
   624  

View as plain text