Source file src/cmd/go/internal/modfetch/codehost/git.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package codehost
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha256"
    11  	"encoding/base64"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"io/fs"
    16  	"net/url"
    17  	"os"
    18  	"os/exec"
    19  	"path/filepath"
    20  	"runtime"
    21  	"slices"
    22  	"sort"
    23  	"strconv"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"cmd/go/internal/base"
    29  	"cmd/go/internal/lockedfile"
    30  	"cmd/go/internal/web"
    31  	"cmd/internal/par"
    32  
    33  	"golang.org/x/mod/semver"
    34  )
    35  
    36  // A notExistError wraps another error to retain its original text
    37  // but makes it opaquely equivalent to fs.ErrNotExist.
    38  type notExistError struct {
    39  	err error
    40  }
    41  
    42  func (e notExistError) Error() string   { return e.err.Error() }
    43  func (notExistError) Is(err error) bool { return err == fs.ErrNotExist }
    44  
    45  const gitWorkDirType = "git3"
    46  
    47  func newGitRepo(ctx context.Context, remote string, local bool) (Repo, error) {
    48  	r := &gitRepo{remote: remote, local: local}
    49  	if local {
    50  		if strings.Contains(remote, "://") { // Local flag, but URL provided
    51  			return nil, fmt.Errorf("git remote (%s) lookup disabled", remote)
    52  		}
    53  		info, err := os.Stat(remote)
    54  		if err != nil {
    55  			return nil, err
    56  		}
    57  		if !info.IsDir() {
    58  			return nil, fmt.Errorf("%s exists but is not a directory", remote)
    59  		}
    60  		r.dir = remote
    61  		r.mu.Path = r.dir + ".lock"
    62  		return r, nil
    63  	}
    64  	// This is a remote path lookup.
    65  	if !strings.Contains(remote, "://") { // No URL scheme, could be host:path
    66  		if strings.Contains(remote, ":") {
    67  			return nil, fmt.Errorf("git remote (%s) must not be local directory (use URL syntax not host:path syntax)", remote)
    68  		}
    69  		return nil, fmt.Errorf("git remote (%s) must not be local directory", remote)
    70  	}
    71  	var err error
    72  	r.dir, r.mu.Path, err = WorkDir(ctx, gitWorkDirType, r.remote)
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  
    77  	unlock, err := r.mu.Lock()
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	defer unlock()
    82  
    83  	if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
    84  		if _, err := Run(ctx, r.dir, "git", "init", "--bare"); err != nil {
    85  			os.RemoveAll(r.dir)
    86  			return nil, err
    87  		}
    88  		// We could just say git fetch https://whatever later,
    89  		// but this lets us say git fetch origin instead, which
    90  		// is a little nicer. More importantly, using a named remote
    91  		// avoids a problem with Git LFS. See golang.org/issue/25605.
    92  		if _, err := r.runGit(ctx, "git", "remote", "add", "origin", "--", r.remote); err != nil {
    93  			os.RemoveAll(r.dir)
    94  			return nil, err
    95  		}
    96  		if runtime.GOOS == "windows" {
    97  			// Git for Windows by default does not support paths longer than
    98  			// MAX_PATH (260 characters) because that may interfere with navigation
    99  			// in some Windows programs. However, cmd/go should be able to handle
   100  			// long paths just fine, and we expect people to use 'go clean' to
   101  			// manipulate the module cache, so it should be harmless to set here,
   102  			// and in some cases may be necessary in order to download modules with
   103  			// long branch names.
   104  			//
   105  			// See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path.
   106  			if _, err := r.runGit(ctx, "git", "config", "core.longpaths", "true"); err != nil {
   107  				os.RemoveAll(r.dir)
   108  				return nil, err
   109  			}
   110  		}
   111  	}
   112  	r.remoteURL = r.remote
   113  	r.remote = "origin"
   114  	return r, nil
   115  }
   116  
   117  type gitRepo struct {
   118  	ctx context.Context
   119  
   120  	remote, remoteURL string
   121  	local             bool // local only lookups; no remote fetches
   122  	dir               string
   123  
   124  	mu lockedfile.Mutex // protects fetchLevel and git repo state
   125  
   126  	fetchLevel int
   127  
   128  	statCache par.ErrCache[string, *RevInfo]
   129  
   130  	refsOnce sync.Once
   131  	// refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master")
   132  	// to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6")
   133  	refs    map[string]string
   134  	refsErr error
   135  
   136  	localTagsOnce sync.Once
   137  	localTags     sync.Map // map[string]bool
   138  }
   139  
   140  const (
   141  	// How much have we fetched into the git repo (in this process)?
   142  	fetchNone = iota // nothing yet
   143  	fetchSome        // shallow fetches of individual hashes
   144  	fetchAll         // "fetch -t origin": get all remote branches and tags
   145  )
   146  
   147  // loadLocalTags loads tag references from the local git cache
   148  // into the map r.localTags.
   149  func (r *gitRepo) loadLocalTags(ctx context.Context) {
   150  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   151  	// so we might as well record both heads and tags in one shot.
   152  	// Most of the time we only care about tags but sometimes we care about heads too.
   153  	out, err := r.runGit(ctx, "git", "tag", "-l")
   154  	if err != nil {
   155  		return
   156  	}
   157  
   158  	for _, line := range strings.Split(string(out), "\n") {
   159  		if line != "" {
   160  			r.localTags.Store(line, true)
   161  		}
   162  	}
   163  }
   164  
   165  func (r *gitRepo) CheckReuse(ctx context.Context, old *Origin, subdir string) error {
   166  	if old == nil {
   167  		return fmt.Errorf("missing origin")
   168  	}
   169  	if old.VCS != "git" || old.URL != r.remoteURL {
   170  		return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL)
   171  	}
   172  	if old.Subdir != subdir {
   173  		return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir)
   174  	}
   175  
   176  	// Note: Can have Hash with no Ref and no TagSum and no RepoSum,
   177  	// meaning the Hash simply has to remain in the repo.
   178  	// In that case we assume it does in the absence of any real way to check.
   179  	// But if neither Hash nor TagSum is present, we have nothing to check,
   180  	// which we take to mean we didn't record enough information to be sure.
   181  	if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" {
   182  		return fmt.Errorf("non-specific origin")
   183  	}
   184  
   185  	r.loadRefs(ctx)
   186  	if r.refsErr != nil {
   187  		return r.refsErr
   188  	}
   189  
   190  	if old.Ref != "" {
   191  		hash, ok := r.refs[old.Ref]
   192  		if !ok {
   193  			return fmt.Errorf("ref %q deleted", old.Ref)
   194  		}
   195  		if hash != old.Hash {
   196  			return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash)
   197  		}
   198  	}
   199  	if old.TagSum != "" {
   200  		tags, err := r.Tags(ctx, old.TagPrefix)
   201  		if err != nil {
   202  			return err
   203  		}
   204  		if tags.Origin.TagSum != old.TagSum {
   205  			return fmt.Errorf("tags changed")
   206  		}
   207  	}
   208  	if old.RepoSum != "" {
   209  		if r.repoSum(r.refs) != old.RepoSum {
   210  			return fmt.Errorf("refs changed")
   211  		}
   212  	}
   213  	return nil
   214  }
   215  
   216  // loadRefs loads heads and tags references from the remote into the map r.refs.
   217  // The result is cached in memory.
   218  func (r *gitRepo) loadRefs(ctx context.Context) (map[string]string, error) {
   219  	if r.local { // Return results from the cache if local only.
   220  		// In the future, we could consider loading r.refs using local git commands
   221  		// if desired.
   222  		return nil, nil
   223  	}
   224  	r.refsOnce.Do(func() {
   225  		// The git protocol sends all known refs and ls-remote filters them on the client side,
   226  		// so we might as well record both heads and tags in one shot.
   227  		// Most of the time we only care about tags but sometimes we care about heads too.
   228  		release, err := base.AcquireNet()
   229  		if err != nil {
   230  			r.refsErr = err
   231  			return
   232  		}
   233  		out, gitErr := r.runGit(ctx, "git", "ls-remote", "-q", r.remote)
   234  		release()
   235  
   236  		if gitErr != nil {
   237  			if rerr, ok := gitErr.(*RunError); ok {
   238  				if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
   239  					rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
   240  				}
   241  			}
   242  
   243  			// If the remote URL doesn't exist at all, ideally we should treat the whole
   244  			// repository as nonexistent by wrapping the error in a notExistError.
   245  			// For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL
   246  			// ourselves and see what code it serves.
   247  			if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
   248  				if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) {
   249  					gitErr = notExistError{gitErr}
   250  				}
   251  			}
   252  
   253  			r.refsErr = gitErr
   254  			return
   255  		}
   256  
   257  		refs := make(map[string]string)
   258  		for _, line := range strings.Split(string(out), "\n") {
   259  			f := strings.Fields(line)
   260  			if len(f) != 2 {
   261  				continue
   262  			}
   263  			if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
   264  				refs[f[1]] = f[0]
   265  			}
   266  		}
   267  		for ref, hash := range refs {
   268  			if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag
   269  				refs[k] = hash
   270  				delete(refs, ref)
   271  			}
   272  		}
   273  		r.refs = refs
   274  	})
   275  	return r.refs, r.refsErr
   276  }
   277  
   278  func (r *gitRepo) Tags(ctx context.Context, prefix string) (*Tags, error) {
   279  	refs, err := r.loadRefs(ctx)
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  
   284  	tags := &Tags{
   285  		Origin: &Origin{
   286  			VCS:       "git",
   287  			URL:       r.remoteURL,
   288  			TagPrefix: prefix,
   289  		},
   290  		List: []Tag{},
   291  	}
   292  	for ref, hash := range refs {
   293  		if !strings.HasPrefix(ref, "refs/tags/") {
   294  			continue
   295  		}
   296  		tag := ref[len("refs/tags/"):]
   297  		if !strings.HasPrefix(tag, prefix) {
   298  			continue
   299  		}
   300  		tags.List = append(tags.List, Tag{tag, hash})
   301  	}
   302  	sort.Slice(tags.List, func(i, j int) bool {
   303  		return tags.List[i].Name < tags.List[j].Name
   304  	})
   305  
   306  	dir := prefix[:strings.LastIndex(prefix, "/")+1]
   307  	h := sha256.New()
   308  	for _, tag := range tags.List {
   309  		if isOriginTag(strings.TrimPrefix(tag.Name, dir)) {
   310  			fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash)
   311  		}
   312  	}
   313  	tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
   314  	return tags, nil
   315  }
   316  
   317  // repoSum returns a checksum of the entire repo state,
   318  // which can be checked (as Origin.RepoSum) to cache
   319  // the absence of a specific module version.
   320  // The caller must supply refs, the result of a successful r.loadRefs.
   321  func (r *gitRepo) repoSum(refs map[string]string) string {
   322  	list := make([]string, 0, len(refs))
   323  	for ref := range refs {
   324  		list = append(list, ref)
   325  	}
   326  	sort.Strings(list)
   327  	h := sha256.New()
   328  	for _, ref := range list {
   329  		fmt.Fprintf(h, "%q %s\n", ref, refs[ref])
   330  	}
   331  	return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
   332  }
   333  
   334  // unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs,
   335  // for use when returning an UnknownRevisionError.
   336  func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo {
   337  	return &RevInfo{
   338  		Origin: &Origin{
   339  			VCS:     "git",
   340  			URL:     r.remoteURL,
   341  			RepoSum: r.repoSum(refs),
   342  		},
   343  	}
   344  }
   345  
   346  func (r *gitRepo) Latest(ctx context.Context) (*RevInfo, error) {
   347  	refs, err := r.loadRefs(ctx)
   348  	if err != nil {
   349  		return nil, err
   350  	}
   351  	if refs["HEAD"] == "" {
   352  		return nil, ErrNoCommits
   353  	}
   354  	statInfo, err := r.Stat(ctx, refs["HEAD"])
   355  	if err != nil {
   356  		return nil, err
   357  	}
   358  
   359  	// Stat may return cached info, so make a copy to modify here.
   360  	info := new(RevInfo)
   361  	*info = *statInfo
   362  	info.Origin = new(Origin)
   363  	if statInfo.Origin != nil {
   364  		*info.Origin = *statInfo.Origin
   365  	}
   366  	info.Origin.Ref = "HEAD"
   367  	info.Origin.Hash = refs["HEAD"]
   368  
   369  	return info, nil
   370  }
   371  
   372  // findRef finds some ref name for the given hash,
   373  // for use when the server requires giving a ref instead of a hash.
   374  // There may be multiple ref names for a given hash,
   375  // in which case this returns some name - it doesn't matter which.
   376  func (r *gitRepo) findRef(ctx context.Context, hash string) (ref string, ok bool) {
   377  	refs, err := r.loadRefs(ctx)
   378  	if err != nil {
   379  		return "", false
   380  	}
   381  	for ref, h := range refs {
   382  		if h == hash {
   383  			return ref, true
   384  		}
   385  	}
   386  	return "", false
   387  }
   388  
   389  // minHashDigits is the minimum number of digits to require
   390  // before accepting a hex digit sequence as potentially identifying
   391  // a specific commit in a git repo. (Of course, users can always
   392  // specify more digits, and many will paste in all 40 digits,
   393  // but many of git's commands default to printing short hashes
   394  // as 7 digits.)
   395  const minHashDigits = 7
   396  
   397  // stat stats the given rev in the local repository,
   398  // or else it fetches more info from the remote repository and tries again.
   399  func (r *gitRepo) stat(ctx context.Context, rev string) (info *RevInfo, err error) {
   400  	// Fast path: maybe rev is a hash we already have locally.
   401  	didStatLocal := false
   402  	if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   403  		if info, err := r.statLocal(ctx, rev, rev); err == nil {
   404  			return info, nil
   405  		}
   406  		didStatLocal = true
   407  	}
   408  
   409  	// Maybe rev is a tag we already have locally.
   410  	// (Note that we're excluding branches, which can be stale.)
   411  	r.localTagsOnce.Do(func() { r.loadLocalTags(ctx) })
   412  	if _, ok := r.localTags.Load(rev); ok {
   413  		return r.statLocal(ctx, rev, "refs/tags/"+rev)
   414  	}
   415  
   416  	// Maybe rev is the name of a tag or branch on the remote server.
   417  	// Or maybe it's the prefix of a hash of a named ref.
   418  	// Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
   419  	refs, err := r.loadRefs(ctx)
   420  	if err != nil {
   421  		return nil, err
   422  	}
   423  	// loadRefs may return an error if git fails, for example segfaults, or
   424  	// could not load a private repo, but defer checking to the else block
   425  	// below, in case we already have the rev in question in the local cache.
   426  	var ref, hash string
   427  	if refs["refs/tags/"+rev] != "" {
   428  		ref = "refs/tags/" + rev
   429  		hash = refs[ref]
   430  		// Keep rev as is: tags are assumed not to change meaning.
   431  	} else if refs["refs/heads/"+rev] != "" {
   432  		ref = "refs/heads/" + rev
   433  		hash = refs[ref]
   434  		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
   435  	} else if rev == "HEAD" && refs["HEAD"] != "" {
   436  		ref = "HEAD"
   437  		hash = refs[ref]
   438  		rev = hash // Replace rev, because meaning of HEAD can change.
   439  	} else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   440  		// At the least, we have a hash prefix we can look up after the fetch below.
   441  		// Maybe we can map it to a full hash using the known refs.
   442  		prefix := rev
   443  		// Check whether rev is prefix of known ref hash.
   444  		for k, h := range refs {
   445  			if strings.HasPrefix(h, prefix) {
   446  				if hash != "" && hash != h {
   447  					// Hash is an ambiguous hash prefix.
   448  					// More information will not change that.
   449  					return nil, fmt.Errorf("ambiguous revision %s", rev)
   450  				}
   451  				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
   452  					ref = k
   453  				}
   454  				rev = h
   455  				hash = h
   456  			}
   457  		}
   458  		if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
   459  			hash = rev
   460  		}
   461  	} else {
   462  		return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev}
   463  	}
   464  
   465  	defer func() {
   466  		if info != nil {
   467  			info.Origin.Hash = info.Name
   468  			// There's a ref = hash below; don't write that hash down as Origin.Ref.
   469  			if ref != info.Origin.Hash {
   470  				info.Origin.Ref = ref
   471  			}
   472  		}
   473  	}()
   474  
   475  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   476  	unlock, err := r.mu.Lock()
   477  	if err != nil {
   478  		return nil, err
   479  	}
   480  	defer unlock()
   481  
   482  	// Perhaps r.localTags did not have the ref when we loaded local tags,
   483  	// but we've since done fetches that pulled down the hash we need
   484  	// (or already have the hash we need, just without its tag).
   485  	// Either way, try a local stat before falling back to network I/O.
   486  	if !didStatLocal {
   487  		if info, err := r.statLocal(ctx, rev, hash); err == nil {
   488  			tag, fromTag := strings.CutPrefix(ref, "refs/tags/")
   489  			if fromTag && !slices.Contains(info.Tags, tag) {
   490  				// The local repo includes the commit hash we want, but it is missing
   491  				// the corresponding tag. Add that tag and try again.
   492  				_, err := r.runGit(ctx, "git", "tag", tag, hash)
   493  				if err != nil {
   494  					return nil, err
   495  				}
   496  				r.localTags.Store(tag, true)
   497  				return r.statLocal(ctx, rev, ref)
   498  			}
   499  			return info, err
   500  		}
   501  	}
   502  
   503  	if r.local { // at this point, we have determined that we need to fetch rev, fail early if local only mode.
   504  		return nil, fmt.Errorf("revision does not exist locally: %s", rev)
   505  	}
   506  
   507  	// If we know a specific commit we need and its ref, fetch it.
   508  	// We do NOT fetch arbitrary hashes (when we don't know the ref)
   509  	// because we want to avoid ever importing a commit that isn't
   510  	// reachable from refs/tags/* or refs/heads/* or HEAD.
   511  	// Both Gerrit and GitHub expose every CL/PR as a named ref,
   512  	// and we don't want those commits masquerading as being real
   513  	// pseudo-versions in the main repo.
   514  	if r.fetchLevel <= fetchSome && ref != "" && hash != "" {
   515  		r.fetchLevel = fetchSome
   516  		var refspec string
   517  		if ref == "HEAD" {
   518  			// Fetch the hash but give it a local name (refs/dummy),
   519  			// because that triggers the fetch behavior of creating any
   520  			// other known remote tags for the hash. We never use
   521  			// refs/dummy (it's not refs/tags/dummy) and it will be
   522  			// overwritten in the next command, and that's fine.
   523  			ref = hash
   524  			refspec = hash + ":refs/dummy"
   525  		} else {
   526  			// If we do know the ref name, save the mapping locally
   527  			// so that (if it is a tag) it can show up in localTags
   528  			// on a future call. Also, some servers refuse to allow
   529  			// full hashes in ref specs, so prefer a ref name if known.
   530  			refspec = ref + ":" + ref
   531  		}
   532  
   533  		release, err := base.AcquireNet()
   534  		if err != nil {
   535  			return nil, err
   536  		}
   537  		// We explicitly set protocol.version=2 for this command to work around
   538  		// an apparent Git bug introduced in Git 2.21 (commit 61c771),
   539  		// which causes the handler for protocol version 1 to sometimes miss
   540  		// tags that point to the requested commit (see https://go.dev/issue/56881).
   541  		_, err = r.runGit(ctx, "git", "-c", "protocol.version=2", "fetch", "-f", "--depth=1", r.remote, refspec)
   542  		release()
   543  
   544  		if err == nil {
   545  			return r.statLocal(ctx, rev, ref)
   546  		}
   547  		// Don't try to be smart about parsing the error.
   548  		// It's too complex and varies too much by git version.
   549  		// No matter what went wrong, fall back to a complete fetch.
   550  	}
   551  
   552  	// Last resort.
   553  	// Fetch all heads and tags and hope the hash we want is in the history.
   554  	if err := r.fetchRefsLocked(ctx); err != nil {
   555  		return nil, err
   556  	}
   557  
   558  	return r.statLocal(ctx, rev, rev)
   559  }
   560  
   561  // fetchRefsLocked fetches all heads and tags from the origin, along with the
   562  // ancestors of those commits.
   563  //
   564  // We only fetch heads and tags, not arbitrary other commits: we don't want to
   565  // pull in off-branch commits (such as rejected GitHub pull requests) that the
   566  // server may be willing to provide. (See the comments within the stat method
   567  // for more detail.)
   568  //
   569  // fetchRefsLocked requires that r.mu remain locked for the duration of the call.
   570  func (r *gitRepo) fetchRefsLocked(ctx context.Context) error {
   571  	if r.local {
   572  		panic("go: fetchRefsLocked called in local only mode.")
   573  	}
   574  	if r.fetchLevel < fetchAll {
   575  		// NOTE: To work around a bug affecting Git clients up to at least 2.23.0
   576  		// (2019-08-16), we must first expand the set of local refs, and only then
   577  		// unshallow the repository as a separate fetch operation. (See
   578  		// golang.org/issue/34266 and
   579  		// https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.)
   580  
   581  		release, err := base.AcquireNet()
   582  		if err != nil {
   583  			return err
   584  		}
   585  		defer release()
   586  
   587  		if _, err := r.runGit(ctx, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
   588  			return err
   589  		}
   590  
   591  		if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil {
   592  			if _, err := r.runGit(ctx, "git", "fetch", "--unshallow", "-f", r.remote); err != nil {
   593  				return err
   594  			}
   595  		}
   596  
   597  		r.fetchLevel = fetchAll
   598  	}
   599  	return nil
   600  }
   601  
   602  // statLocal returns a new RevInfo describing rev in the local git repository.
   603  // It uses version as info.Version.
   604  func (r *gitRepo) statLocal(ctx context.Context, version, rev string) (*RevInfo, error) {
   605  	out, err := r.runGit(ctx, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--")
   606  	if err != nil {
   607  		// Return info with Origin.RepoSum if possible to allow caching of negative lookup.
   608  		var info *RevInfo
   609  		if refs, err := r.loadRefs(ctx); err == nil {
   610  			info = r.unknownRevisionInfo(refs)
   611  		}
   612  		return info, &UnknownRevisionError{Rev: rev}
   613  	}
   614  	f := strings.Fields(string(out))
   615  	if len(f) < 2 {
   616  		return nil, fmt.Errorf("unexpected response from git log: %q", out)
   617  	}
   618  	hash := f[0]
   619  	if strings.HasPrefix(hash, version) {
   620  		version = hash // extend to full hash
   621  	}
   622  	t, err := strconv.ParseInt(f[1], 10, 64)
   623  	if err != nil {
   624  		return nil, fmt.Errorf("invalid time from git log: %q", out)
   625  	}
   626  
   627  	info := &RevInfo{
   628  		Origin: &Origin{
   629  			VCS:  "git",
   630  			URL:  r.remoteURL,
   631  			Hash: hash,
   632  		},
   633  		Name:    hash,
   634  		Short:   ShortenSHA1(hash),
   635  		Time:    time.Unix(t, 0).UTC(),
   636  		Version: hash,
   637  	}
   638  	if !strings.HasPrefix(hash, rev) {
   639  		info.Origin.Ref = rev
   640  	}
   641  
   642  	// Add tags. Output looks like:
   643  	//	ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
   644  	for i := 2; i < len(f); i++ {
   645  		if f[i] == "tag:" {
   646  			i++
   647  			if i < len(f) {
   648  				info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
   649  			}
   650  		}
   651  	}
   652  	sort.Strings(info.Tags)
   653  
   654  	// Used hash as info.Version above.
   655  	// Use caller's suggested version if it appears in the tag list
   656  	// (filters out branch names, HEAD).
   657  	for _, tag := range info.Tags {
   658  		if version == tag {
   659  			info.Version = version
   660  		}
   661  	}
   662  
   663  	return info, nil
   664  }
   665  
   666  func (r *gitRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
   667  	if rev == "latest" {
   668  		return r.Latest(ctx)
   669  	}
   670  	return r.statCache.Do(rev, func() (*RevInfo, error) {
   671  		return r.stat(ctx, rev)
   672  	})
   673  }
   674  
   675  func (r *gitRepo) ReadFile(ctx context.Context, rev, file string, maxSize int64) ([]byte, error) {
   676  	// TODO: Could use git cat-file --batch.
   677  	info, err := r.Stat(ctx, rev) // download rev into local git repo
   678  	if err != nil {
   679  		return nil, err
   680  	}
   681  	out, err := r.runGit(ctx, "git", "cat-file", "blob", info.Name+":"+file)
   682  	if err != nil {
   683  		return nil, fs.ErrNotExist
   684  	}
   685  	return out, nil
   686  }
   687  
   688  func (r *gitRepo) RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) {
   689  	info, err := r.Stat(ctx, rev)
   690  	if err != nil {
   691  		return "", err
   692  	}
   693  	rev = info.Name // expand hash prefixes
   694  
   695  	// describe sets tag and err using 'git for-each-ref' and reports whether the
   696  	// result is definitive.
   697  	describe := func() (definitive bool) {
   698  		var out []byte
   699  		out, err = r.runGit(ctx, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev)
   700  		if err != nil {
   701  			return true
   702  		}
   703  
   704  		// prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix
   705  		var highest string
   706  		for _, line := range strings.Split(string(out), "\n") {
   707  			line = strings.TrimSpace(line)
   708  			// git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here
   709  			// instead gives support for git v2.7.0.
   710  			if !strings.HasPrefix(line, "refs/tags/") {
   711  				continue
   712  			}
   713  			line = line[len("refs/tags/"):]
   714  
   715  			if !strings.HasPrefix(line, prefix) {
   716  				continue
   717  			}
   718  			if !allowed(line) {
   719  				continue
   720  			}
   721  
   722  			semtag := line[len(prefix):]
   723  			if semver.Compare(semtag, highest) > 0 {
   724  				highest = semtag
   725  			}
   726  		}
   727  
   728  		if highest != "" {
   729  			tag = prefix + highest
   730  		}
   731  
   732  		return tag != "" && !AllHex(tag)
   733  	}
   734  
   735  	if describe() {
   736  		return tag, err
   737  	}
   738  
   739  	// Git didn't find a version tag preceding the requested rev.
   740  	// See whether any plausible tag exists.
   741  	tags, err := r.Tags(ctx, prefix+"v")
   742  	if err != nil {
   743  		return "", err
   744  	}
   745  	if len(tags.List) == 0 {
   746  		return "", nil
   747  	}
   748  
   749  	if r.local { // at this point, we have determined that we need to fetch rev, fail early if local only mode.
   750  		return "", fmt.Errorf("revision does not exist locally: %s", rev)
   751  	}
   752  	// There are plausible tags, but we don't know if rev is a descendent of any of them.
   753  	// Fetch the history to find out.
   754  
   755  	unlock, err := r.mu.Lock()
   756  	if err != nil {
   757  		return "", err
   758  	}
   759  	defer unlock()
   760  
   761  	if err := r.fetchRefsLocked(ctx); err != nil {
   762  		return "", err
   763  	}
   764  
   765  	// If we've reached this point, we have all of the commits that are reachable
   766  	// from all heads and tags.
   767  	//
   768  	// The only refs we should be missing are those that are no longer reachable
   769  	// (or never were reachable) from any branch or tag, including the master
   770  	// branch, and we don't want to resolve them anyway (they're probably
   771  	// unreachable for a reason).
   772  	//
   773  	// Try one last time in case some other goroutine fetched rev while we were
   774  	// waiting on the lock.
   775  	describe()
   776  	return tag, err
   777  }
   778  
   779  func (r *gitRepo) DescendsFrom(ctx context.Context, rev, tag string) (bool, error) {
   780  	// The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so
   781  	// this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go
   782  	// already doesn't work with Git 1.7.1, so at least it's not a regression.
   783  	//
   784  	// git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or
   785  	// 1 if not.
   786  	_, err := r.runGit(ctx, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   787  
   788  	// Git reports "is an ancestor" with exit code 0 and "not an ancestor" with
   789  	// exit code 1.
   790  	// Unfortunately, if we've already fetched rev with a shallow history, git
   791  	// merge-base has been observed to report a false-negative, so don't stop yet
   792  	// even if the exit code is 1!
   793  	if err == nil {
   794  		return true, nil
   795  	}
   796  
   797  	// See whether the tag and rev even exist.
   798  	tags, err := r.Tags(ctx, tag)
   799  	if err != nil {
   800  		return false, err
   801  	}
   802  	if len(tags.List) == 0 {
   803  		return false, nil
   804  	}
   805  
   806  	// NOTE: r.stat is very careful not to fetch commits that we shouldn't know
   807  	// about, like rejected GitHub pull requests, so don't try to short-circuit
   808  	// that here.
   809  	if _, err = r.stat(ctx, rev); err != nil {
   810  		return false, err
   811  	}
   812  
   813  	if r.local { // at this point, we have determined that we need to fetch rev, fail early if local only mode.
   814  		return false, fmt.Errorf("revision does not exist locally: %s", rev)
   815  	}
   816  
   817  	// Now fetch history so that git can search for a path.
   818  	unlock, err := r.mu.Lock()
   819  	if err != nil {
   820  		return false, err
   821  	}
   822  	defer unlock()
   823  
   824  	if r.fetchLevel < fetchAll {
   825  		// Fetch the complete history for all refs and heads. It would be more
   826  		// efficient to only fetch the history from rev to tag, but that's much more
   827  		// complicated, and any kind of shallow fetch is fairly likely to trigger
   828  		// bugs in JGit servers and/or the go command anyway.
   829  		if err := r.fetchRefsLocked(ctx); err != nil {
   830  			return false, err
   831  		}
   832  	}
   833  
   834  	_, err = r.runGit(ctx, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   835  	if err == nil {
   836  		return true, nil
   837  	}
   838  	if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
   839  		return false, nil
   840  	}
   841  	return false, err
   842  }
   843  
   844  func (r *gitRepo) ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
   845  	// TODO: Use maxSize or drop it.
   846  	args := []string{}
   847  	if subdir != "" {
   848  		args = append(args, "--", subdir)
   849  	}
   850  	info, err := r.Stat(ctx, rev) // download rev into local git repo
   851  	if err != nil {
   852  		return nil, err
   853  	}
   854  
   855  	unlock, err := r.mu.Lock()
   856  	if err != nil {
   857  		return nil, err
   858  	}
   859  	defer unlock()
   860  
   861  	if err := ensureGitAttributes(r.dir); err != nil {
   862  		return nil, err
   863  	}
   864  
   865  	// Incredibly, git produces different archives depending on whether
   866  	// it is running on a Windows system or not, in an attempt to normalize
   867  	// text file line endings. Setting -c core.autocrlf=input means only
   868  	// translate files on the way into the repo, not on the way out (archive).
   869  	// The -c core.eol=lf should be unnecessary but set it anyway.
   870  	archive, err := r.runGit(ctx, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
   871  	if err != nil {
   872  		if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
   873  			return nil, fs.ErrNotExist
   874  		}
   875  		return nil, err
   876  	}
   877  
   878  	return io.NopCloser(bytes.NewReader(archive)), nil
   879  }
   880  
   881  // ensureGitAttributes makes sure export-subst and export-ignore features are
   882  // disabled for this repo. This is intended to be run prior to running git
   883  // archive so that zip files are generated that produce consistent ziphashes
   884  // for a given revision, independent of variables such as git version and the
   885  // size of the repo.
   886  //
   887  // See: https://github.com/golang/go/issues/27153
   888  func ensureGitAttributes(repoDir string) (err error) {
   889  	const attr = "\n* -export-subst -export-ignore\n"
   890  
   891  	d := repoDir + "/info"
   892  	p := d + "/attributes"
   893  
   894  	if err := os.MkdirAll(d, 0755); err != nil {
   895  		return err
   896  	}
   897  
   898  	f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
   899  	if err != nil {
   900  		return err
   901  	}
   902  	defer func() {
   903  		closeErr := f.Close()
   904  		if closeErr != nil {
   905  			err = closeErr
   906  		}
   907  	}()
   908  
   909  	b, err := io.ReadAll(f)
   910  	if err != nil {
   911  		return err
   912  	}
   913  	if !bytes.HasSuffix(b, []byte(attr)) {
   914  		_, err := f.WriteString(attr)
   915  		return err
   916  	}
   917  
   918  	return nil
   919  }
   920  
   921  func (r *gitRepo) runGit(ctx context.Context, cmdline ...any) ([]byte, error) {
   922  	args := RunArgs{cmdline: cmdline, dir: r.dir, local: r.local}
   923  	if !r.local {
   924  		// Manually supply GIT_DIR so Git works with safe.bareRepository=explicit set.
   925  		// This is necessary only for remote repositories as they are initialized with git init --bare.
   926  		args.env = []string{"GIT_DIR=" + r.dir}
   927  	}
   928  	return RunWithArgs(ctx, args)
   929  }
   930  

View as plain text