Source file src/cmd/go/internal/modindex/scan.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package modindex
     6  
     7  import (
     8  	"cmd/go/internal/base"
     9  	"cmd/go/internal/fsys"
    10  	"cmd/go/internal/str"
    11  	"encoding/json"
    12  	"errors"
    13  	"fmt"
    14  	"go/build"
    15  	"go/doc"
    16  	"go/scanner"
    17  	"go/token"
    18  	"io/fs"
    19  	"path/filepath"
    20  	"strings"
    21  )
    22  
    23  // moduleWalkErr returns filepath.SkipDir if the directory isn't relevant
    24  // when indexing a module or generating a filehash, ErrNotIndexed,
    25  // if the module shouldn't be indexed, and nil otherwise.
    26  func moduleWalkErr(root string, path string, info fs.FileInfo, err error) error {
    27  	if err != nil {
    28  		return ErrNotIndexed
    29  	}
    30  	// stop at module boundaries
    31  	if info.IsDir() && path != root {
    32  		if fi, err := fsys.Stat(filepath.Join(path, "go.mod")); err == nil && !fi.IsDir() {
    33  			return filepath.SkipDir
    34  		}
    35  	}
    36  	if info.Mode()&fs.ModeSymlink != 0 {
    37  		if target, err := fsys.Stat(path); err == nil && target.IsDir() {
    38  			// return an error to make the module hash invalid.
    39  			// Symlink directories in modules are tricky, so we won't index
    40  			// modules that contain them.
    41  			// TODO(matloob): perhaps don't return this error if the symlink leads to
    42  			// a directory with a go.mod file.
    43  			return ErrNotIndexed
    44  		}
    45  	}
    46  	return nil
    47  }
    48  
    49  // indexModule indexes the module at the given directory and returns its
    50  // encoded representation. It returns ErrNotIndexed if the module can't
    51  // be indexed because it contains symlinks.
    52  func indexModule(modroot string) ([]byte, error) {
    53  	fsys.Trace("indexModule", modroot)
    54  	var packages []*rawPackage
    55  
    56  	// If the root itself is a symlink to a directory,
    57  	// we want to follow it (see https://go.dev/issue/50807).
    58  	// Add a trailing separator to force that to happen.
    59  	root := str.WithFilePathSeparator(modroot)
    60  	err := fsys.Walk(root, func(path string, info fs.FileInfo, err error) error {
    61  		if err := moduleWalkErr(root, path, info, err); err != nil {
    62  			return err
    63  		}
    64  
    65  		if !info.IsDir() {
    66  			return nil
    67  		}
    68  		if !strings.HasPrefix(path, root) {
    69  			panic(fmt.Errorf("path %v in walk doesn't have modroot %v as prefix", path, modroot))
    70  		}
    71  		rel := path[len(root):]
    72  		packages = append(packages, importRaw(modroot, rel))
    73  		return nil
    74  	})
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  	return encodeModuleBytes(packages), nil
    79  }
    80  
    81  // indexPackage indexes the package at the given directory and returns its
    82  // encoded representation. It returns ErrNotIndexed if the package can't
    83  // be indexed.
    84  func indexPackage(modroot, pkgdir string) []byte {
    85  	fsys.Trace("indexPackage", pkgdir)
    86  	p := importRaw(modroot, relPath(pkgdir, modroot))
    87  	return encodePackageBytes(p)
    88  }
    89  
    90  // rawPackage holds the information from each package that's needed to
    91  // fill a build.Package once the context is available.
    92  type rawPackage struct {
    93  	error string
    94  	dir   string // directory containing package sources, relative to the module root
    95  
    96  	// Source files
    97  	sourceFiles []*rawFile
    98  }
    99  
   100  type parseError struct {
   101  	ErrorList   *scanner.ErrorList
   102  	ErrorString string
   103  }
   104  
   105  // parseErrorToString converts the error from parsing the file into a string
   106  // representation. A nil error is converted to an empty string, and all other
   107  // errors are converted to a JSON-marshaled parseError struct, with ErrorList
   108  // set for errors of type scanner.ErrorList, and ErrorString set to the error's
   109  // string representation for all other errors.
   110  func parseErrorToString(err error) string {
   111  	if err == nil {
   112  		return ""
   113  	}
   114  	var p parseError
   115  	if e, ok := err.(scanner.ErrorList); ok {
   116  		p.ErrorList = &e
   117  	} else {
   118  		p.ErrorString = e.Error()
   119  	}
   120  	s, err := json.Marshal(p)
   121  	if err != nil {
   122  		panic(err) // This should be impossible because scanner.Error contains only strings and ints.
   123  	}
   124  	return string(s)
   125  }
   126  
   127  // parseErrorFromString converts a string produced by parseErrorToString back
   128  // to an error.  An empty string is converted to a nil error, and all
   129  // other strings are expected to be JSON-marshaled parseError structs.
   130  // The two functions are meant to preserve the structure of an
   131  // error of type scanner.ErrorList in a round trip, but may not preserve the
   132  // structure of other errors.
   133  func parseErrorFromString(s string) error {
   134  	if s == "" {
   135  		return nil
   136  	}
   137  	var p parseError
   138  	if err := json.Unmarshal([]byte(s), &p); err != nil {
   139  		base.Fatalf(`go: invalid parse error value in index: %q. This indicates a corrupted index. Run "go clean -cache" to reset the module cache.`, s)
   140  	}
   141  	if p.ErrorList != nil {
   142  		return *p.ErrorList
   143  	}
   144  	return errors.New(p.ErrorString)
   145  }
   146  
   147  // rawFile is the struct representation of the file holding all
   148  // information in its fields.
   149  type rawFile struct {
   150  	error      string
   151  	parseError string
   152  
   153  	name                 string
   154  	synopsis             string // doc.Synopsis of package comment... Compute synopsis on all of these?
   155  	pkgName              string
   156  	ignoreFile           bool   // starts with _ or . or should otherwise always be ignored
   157  	binaryOnly           bool   // cannot be rebuilt from source (has //go:binary-only-package comment)
   158  	cgoDirectives        string // the #cgo directive lines in the comment on import "C"
   159  	goBuildConstraint    string
   160  	plusBuildConstraints []string
   161  	imports              []rawImport
   162  	embeds               []embed
   163  	directives           []build.Directive
   164  }
   165  
   166  type rawImport struct {
   167  	path     string
   168  	position token.Position
   169  }
   170  
   171  type embed struct {
   172  	pattern  string
   173  	position token.Position
   174  }
   175  
   176  // importRaw fills the rawPackage from the package files in srcDir.
   177  // dir is the package's path relative to the modroot.
   178  func importRaw(modroot, reldir string) *rawPackage {
   179  	p := &rawPackage{
   180  		dir: reldir,
   181  	}
   182  
   183  	absdir := filepath.Join(modroot, reldir)
   184  
   185  	// We still haven't checked
   186  	// that p.dir directory exists. This is the right time to do that check.
   187  	// We can't do it earlier, because we want to gather partial information for the
   188  	// non-nil *build.Package returned when an error occurs.
   189  	// We need to do this before we return early on FindOnly flag.
   190  	if !isDir(absdir) {
   191  		// package was not found
   192  		p.error = fmt.Errorf("cannot find package in:\n\t%s", absdir).Error()
   193  		return p
   194  	}
   195  
   196  	entries, err := fsys.ReadDir(absdir)
   197  	if err != nil {
   198  		p.error = err.Error()
   199  		return p
   200  	}
   201  
   202  	fset := token.NewFileSet()
   203  	for _, d := range entries {
   204  		if d.IsDir() {
   205  			continue
   206  		}
   207  		if d.Mode()&fs.ModeSymlink != 0 {
   208  			if isDir(filepath.Join(absdir, d.Name())) {
   209  				// Symlinks to directories are not source files.
   210  				continue
   211  			}
   212  		}
   213  
   214  		name := d.Name()
   215  		ext := nameExt(name)
   216  
   217  		if strings.HasPrefix(name, "_") || strings.HasPrefix(name, ".") {
   218  			continue
   219  		}
   220  		info, err := getFileInfo(absdir, name, fset)
   221  		if err == errNonSource {
   222  			// not a source or object file. completely ignore in the index
   223  			continue
   224  		} else if err != nil {
   225  			p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, error: err.Error()})
   226  			continue
   227  		} else if info == nil {
   228  			p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, ignoreFile: true})
   229  			continue
   230  		}
   231  		rf := &rawFile{
   232  			name:                 name,
   233  			goBuildConstraint:    info.goBuildConstraint,
   234  			plusBuildConstraints: info.plusBuildConstraints,
   235  			binaryOnly:           info.binaryOnly,
   236  			directives:           info.directives,
   237  		}
   238  		if info.parsed != nil {
   239  			rf.pkgName = info.parsed.Name.Name
   240  		}
   241  
   242  		// Going to save the file. For non-Go files, can stop here.
   243  		p.sourceFiles = append(p.sourceFiles, rf)
   244  		if ext != ".go" {
   245  			continue
   246  		}
   247  
   248  		if info.parseErr != nil {
   249  			rf.parseError = parseErrorToString(info.parseErr)
   250  			// Fall through: we might still have a partial AST in info.Parsed,
   251  			// and we want to list files with parse errors anyway.
   252  		}
   253  
   254  		if info.parsed != nil && info.parsed.Doc != nil {
   255  			rf.synopsis = doc.Synopsis(info.parsed.Doc.Text())
   256  		}
   257  
   258  		var cgoDirectives []string
   259  		for _, imp := range info.imports {
   260  			if imp.path == "C" {
   261  				cgoDirectives = append(cgoDirectives, extractCgoDirectives(imp.doc.Text())...)
   262  			}
   263  			rf.imports = append(rf.imports, rawImport{path: imp.path, position: fset.Position(imp.pos)})
   264  		}
   265  		rf.cgoDirectives = strings.Join(cgoDirectives, "\n")
   266  		for _, emb := range info.embeds {
   267  			rf.embeds = append(rf.embeds, embed{emb.pattern, emb.pos})
   268  		}
   269  
   270  	}
   271  	return p
   272  }
   273  
   274  // extractCgoDirectives filters only the lines containing #cgo directives from the input,
   275  // which is the comment on import "C".
   276  func extractCgoDirectives(doc string) []string {
   277  	var out []string
   278  	for _, line := range strings.Split(doc, "\n") {
   279  		// Line is
   280  		//	#cgo [GOOS/GOARCH...] LDFLAGS: stuff
   281  		//
   282  		line = strings.TrimSpace(line)
   283  		if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') {
   284  			continue
   285  		}
   286  
   287  		out = append(out, line)
   288  	}
   289  	return out
   290  }
   291  

View as plain text