read.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package build
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	"go/ast"
    13  	"go/parser"
    14  	"go/scanner"
    15  	"go/token"
    16  	"io"
    17  	"strconv"
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  	_ "unsafe" // for linkname
    22  )
    23  
    24  type importReader struct {
    25  	b    *bufio.Reader
    26  	buf  []byte
    27  	peek byte
    28  	err  error
    29  	eof  bool
    30  	nerr int
    31  	pos  token.Position
    32  }
    33  
    34  var bom = []byte{0xef, 0xbb, 0xbf}
    35  
    36  func newImportReader(name string, r io.Reader) *importReader {
    37  	b := bufio.NewReader(r)
    38  	// Remove leading UTF-8 BOM.
    39  	// Per https://golang.org/ref/spec#Source_code_representation:
    40  	// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
    41  	// if it is the first Unicode code point in the source text.
    42  	if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
    43  		b.Discard(3)
    44  	}
    45  	return &importReader{
    46  		b: b,
    47  		pos: token.Position{
    48  			Filename: name,
    49  			Line:     1,
    50  			Column:   1,
    51  		},
    52  	}
    53  }
    54  
    55  func isIdent(c byte) bool {
    56  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
    57  }
    58  
    59  var (
    60  	errSyntax = errors.New("syntax error")
    61  	errNUL    = errors.New("unexpected NUL in input")
    62  )
    63  
    64  // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
    65  func (r *importReader) syntaxError() {
    66  	if r.err == nil {
    67  		r.err = errSyntax
    68  	}
    69  }
    70  
    71  // readByte reads the next byte from the input, saves it in buf, and returns it.
    72  // If an error occurs, readByte records the error in r.err and returns 0.
    73  func (r *importReader) readByte() byte {
    74  	c, err := r.b.ReadByte()
    75  	if err == nil {
    76  		r.buf = append(r.buf, c)
    77  		if c == 0 {
    78  			err = errNUL
    79  		}
    80  	}
    81  	if err != nil {
    82  		if err == io.EOF {
    83  			r.eof = true
    84  		} else if r.err == nil {
    85  			r.err = err
    86  		}
    87  		c = 0
    88  	}
    89  	return c
    90  }
    91  
    92  // readRest reads the entire rest of the file into r.buf.
    93  func (r *importReader) readRest() {
    94  	for {
    95  		if len(r.buf) == cap(r.buf) {
    96  			// Grow the buffer
    97  			r.buf = append(r.buf, 0)[:len(r.buf)]
    98  		}
    99  		n, err := r.b.Read(r.buf[len(r.buf):cap(r.buf)])
   100  		r.buf = r.buf[:len(r.buf)+n]
   101  		if err != nil {
   102  			if err == io.EOF {
   103  				r.eof = true
   104  			} else if r.err == nil {
   105  				r.err = err
   106  			}
   107  			break
   108  		}
   109  	}
   110  }
   111  
   112  // peekByte returns the next byte from the input reader but does not advance beyond it.
   113  // If skipSpace is set, peekByte skips leading spaces and comments.
   114  func (r *importReader) peekByte(skipSpace bool) byte {
   115  	if r.err != nil {
   116  		if r.nerr++; r.nerr > 10000 {
   117  			panic("go/build: import reader looping")
   118  		}
   119  		return 0
   120  	}
   121  
   122  	// Use r.peek as first input byte.
   123  	// Don't just return r.peek here: it might have been left by peekByte(false)
   124  	// and this might be peekByte(true).
   125  	c := r.peek
   126  	if c == 0 {
   127  		c = r.readByte()
   128  	}
   129  	for r.err == nil && !r.eof {
   130  		if skipSpace {
   131  			// For the purposes of this reader, semicolons are never necessary to
   132  			// understand the input and are treated as spaces.
   133  			switch c {
   134  			case ' ', '\f', '\t', '\r', '\n', ';':
   135  				c = r.readByte()
   136  				continue
   137  
   138  			case '/':
   139  				c = r.readByte()
   140  				if c == '/' {
   141  					for c != '\n' && r.err == nil && !r.eof {
   142  						c = r.readByte()
   143  					}
   144  				} else if c == '*' {
   145  					var c1 byte
   146  					for (c != '*' || c1 != '/') && r.err == nil {
   147  						if r.eof {
   148  							r.syntaxError()
   149  						}
   150  						c, c1 = c1, r.readByte()
   151  					}
   152  				} else {
   153  					r.syntaxError()
   154  				}
   155  				c = r.readByte()
   156  				continue
   157  			}
   158  		}
   159  		break
   160  	}
   161  	r.peek = c
   162  	return r.peek
   163  }
   164  
   165  // nextByte is like peekByte but advances beyond the returned byte.
   166  func (r *importReader) nextByte(skipSpace bool) byte {
   167  	c := r.peekByte(skipSpace)
   168  	r.peek = 0
   169  	return c
   170  }
   171  
   172  // readKeyword reads the given keyword from the input.
   173  // If the keyword is not present, readKeyword records a syntax error.
   174  func (r *importReader) readKeyword(kw string) {
   175  	r.peekByte(true)
   176  	for i := 0; i < len(kw); i++ {
   177  		if r.nextByte(false) != kw[i] {
   178  			r.syntaxError()
   179  			return
   180  		}
   181  	}
   182  	if isIdent(r.peekByte(false)) {
   183  		r.syntaxError()
   184  	}
   185  }
   186  
   187  // readIdent reads an identifier from the input.
   188  // If an identifier is not present, readIdent records a syntax error.
   189  func (r *importReader) readIdent() {
   190  	c := r.peekByte(true)
   191  	if !isIdent(c) {
   192  		r.syntaxError()
   193  		return
   194  	}
   195  	for isIdent(r.peekByte(false)) {
   196  		r.peek = 0
   197  	}
   198  }
   199  
   200  // readString reads a quoted string literal from the input.
   201  // If an identifier is not present, readString records a syntax error.
   202  func (r *importReader) readString() {
   203  	switch r.nextByte(true) {
   204  	case '`':
   205  		for r.err == nil {
   206  			if r.nextByte(false) == '`' {
   207  				break
   208  			}
   209  			if r.eof {
   210  				r.syntaxError()
   211  			}
   212  		}
   213  	case '"':
   214  		for r.err == nil {
   215  			c := r.nextByte(false)
   216  			if c == '"' {
   217  				break
   218  			}
   219  			if r.eof || c == '\n' {
   220  				r.syntaxError()
   221  			}
   222  			if c == '\\' {
   223  				r.nextByte(false)
   224  			}
   225  		}
   226  	default:
   227  		r.syntaxError()
   228  	}
   229  }
   230  
   231  // readImport reads an import clause - optional identifier followed by quoted string -
   232  // from the input.
   233  func (r *importReader) readImport() {
   234  	c := r.peekByte(true)
   235  	if c == '.' {
   236  		r.peek = 0
   237  	} else if isIdent(c) {
   238  		r.readIdent()
   239  	}
   240  	r.readString()
   241  }
   242  
   243  // readComments is like io.ReadAll, except that it only reads the leading
   244  // block of comments in the file.
   245  //
   246  // readComments should be an internal detail,
   247  // but widely used packages access it using linkname.
   248  // Notable members of the hall of shame include:
   249  //   - github.com/bazelbuild/bazel-gazelle
   250  //
   251  // Do not remove or change the type signature.
   252  // See go.dev/issue/67401.
   253  //
   254  //go:linkname readComments
   255  func readComments(f io.Reader) ([]byte, error) {
   256  	r := newImportReader("", f)
   257  	r.peekByte(true)
   258  	if r.err == nil && !r.eof {
   259  		// Didn't reach EOF, so must have found a non-space byte. Remove it.
   260  		r.buf = r.buf[:len(r.buf)-1]
   261  	}
   262  	return r.buf, r.err
   263  }
   264  
   265  // readGoInfo expects a Go file as input and reads the file up to and including the import section.
   266  // It records what it learned in *info.
   267  // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
   268  // info.imports and info.embeds.
   269  //
   270  // It only returns an error if there are problems reading the file,
   271  // not for syntax errors in the file itself.
   272  func readGoInfo(f io.Reader, info *fileInfo) error {
   273  	r := newImportReader(info.name, f)
   274  
   275  	r.readKeyword("package")
   276  	r.readIdent()
   277  	for r.peekByte(true) == 'i' {
   278  		r.readKeyword("import")
   279  		if r.peekByte(true) == '(' {
   280  			r.nextByte(false)
   281  			for r.peekByte(true) != ')' && r.err == nil {
   282  				r.readImport()
   283  			}
   284  			r.nextByte(false)
   285  		} else {
   286  			r.readImport()
   287  		}
   288  	}
   289  
   290  	info.header = r.buf
   291  
   292  	// If we stopped successfully before EOF, we read a byte that told us we were done.
   293  	// Return all but that last byte, which would cause a syntax error if we let it through.
   294  	if r.err == nil && !r.eof {
   295  		info.header = r.buf[:len(r.buf)-1]
   296  	}
   297  
   298  	// If we stopped for a syntax error, consume the whole file so that
   299  	// we are sure we don't change the errors that go/parser returns.
   300  	if r.err == errSyntax {
   301  		r.err = nil
   302  		r.readRest()
   303  		info.header = r.buf
   304  	}
   305  	if r.err != nil {
   306  		return r.err
   307  	}
   308  
   309  	if info.fset == nil {
   310  		return nil
   311  	}
   312  
   313  	// Parse file header & record imports.
   314  	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
   315  	if info.parseErr != nil {
   316  		return nil
   317  	}
   318  
   319  	hasEmbed := false
   320  	for _, decl := range info.parsed.Decls {
   321  		d, ok := decl.(*ast.GenDecl)
   322  		if !ok {
   323  			continue
   324  		}
   325  		for _, dspec := range d.Specs {
   326  			spec, ok := dspec.(*ast.ImportSpec)
   327  			if !ok {
   328  				continue
   329  			}
   330  			quoted := spec.Path.Value
   331  			path, err := strconv.Unquote(quoted)
   332  			if err != nil {
   333  				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
   334  			}
   335  			if !isValidImport(path) {
   336  				// The parser used to return a parse error for invalid import paths, but
   337  				// no longer does, so check for and create the error here instead.
   338  				info.parseErr = scanner.Error{Pos: info.fset.Position(spec.Pos()), Msg: "invalid import path: " + path}
   339  				info.imports = nil
   340  				return nil
   341  			}
   342  			if path == "embed" {
   343  				hasEmbed = true
   344  			}
   345  
   346  			doc := spec.Doc
   347  			if doc == nil && len(d.Specs) == 1 {
   348  				doc = d.Doc
   349  			}
   350  			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
   351  		}
   352  	}
   353  
   354  	// Extract directives.
   355  	for _, group := range info.parsed.Comments {
   356  		if group.Pos() >= info.parsed.Package {
   357  			break
   358  		}
   359  		for _, c := range group.List {
   360  			if strings.HasPrefix(c.Text, "//go:") {
   361  				info.directives = append(info.directives, Directive{c.Text, info.fset.Position(c.Slash)})
   362  			}
   363  		}
   364  	}
   365  
   366  	// If the file imports "embed",
   367  	// we have to look for //go:embed comments
   368  	// in the remainder of the file.
   369  	// The compiler will enforce the mapping of comments to
   370  	// declared variables. We just need to know the patterns.
   371  	// If there were //go:embed comments earlier in the file
   372  	// (near the package statement or imports), the compiler
   373  	// will reject them. They can be (and have already been) ignored.
   374  	if hasEmbed {
   375  		r.readRest()
   376  		fset := token.NewFileSet()
   377  		file := fset.AddFile(r.pos.Filename, -1, len(r.buf))
   378  		var sc scanner.Scanner
   379  		sc.Init(file, r.buf, nil, scanner.ScanComments)
   380  		for {
   381  			pos, tok, lit := sc.Scan()
   382  			if tok == token.EOF {
   383  				break
   384  			}
   385  			if tok == token.COMMENT && strings.HasPrefix(lit, "//go:embed") {
   386  				// Ignore badly-formed lines - the compiler will report them when it finds them,
   387  				// and we can pretend they are not there to help go list succeed with what it knows.
   388  				embs, err := parseGoEmbed(fset, pos, lit)
   389  				if err == nil {
   390  					info.embeds = append(info.embeds, embs...)
   391  				}
   392  			}
   393  		}
   394  	}
   395  
   396  	return nil
   397  }
   398  
   399  // isValidImport checks if the import is a valid import using the more strict
   400  // checks allowed by the implementation restriction in https://go.dev/ref/spec#Import_declarations.
   401  // It was ported from the function of the same name that was removed from the
   402  // parser in CL 424855, when the parser stopped doing these checks.
   403  func isValidImport(s string) bool {
   404  	const illegalChars = `!"#$%&'()*,:;<=>?[\]^{|}` + "`\uFFFD"
   405  	for _, r := range s {
   406  		if !unicode.IsGraphic(r) || unicode.IsSpace(r) || strings.ContainsRune(illegalChars, r) {
   407  			return false
   408  		}
   409  	}
   410  	return s != ""
   411  }
   412  
   413  // parseGoEmbed parses a "//go:embed" to extract the glob patterns.
   414  // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
   415  // This must match the behavior of cmd/compile/internal/noder.go.
   416  func parseGoEmbed(fset *token.FileSet, pos token.Pos, comment string) ([]fileEmbed, error) {
   417  	dir, ok := ast.ParseDirective(pos, comment)
   418  	if !ok || dir.Tool != "go" || dir.Name != "embed" {
   419  		return nil, nil
   420  	}
   421  	args, err := dir.ParseArgs()
   422  	if err != nil {
   423  		return nil, err
   424  	}
   425  	var list []fileEmbed
   426  	for _, arg := range args {
   427  		list = append(list, fileEmbed{arg.Arg, fset.Position(arg.Pos)})
   428  	}
   429  	return list, nil
   430  }
   431
View as plain text