Source file src/cmd/go/internal/modfetch/codehost/git.go

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package codehost
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha256"
    11  	"encoding/base64"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"io/fs"
    16  	"net/url"
    17  	"os"
    18  	"os/exec"
    19  	"path/filepath"
    20  	"regexp"
    21  	"runtime"
    22  	"slices"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"cmd/go/internal/base"
    30  	"cmd/go/internal/lockedfile"
    31  	"cmd/go/internal/web"
    32  	"cmd/internal/par"
    33  
    34  	"golang.org/x/mod/semver"
    35  )
    36  
    37  // A notExistError wraps another error to retain its original text
    38  // but makes it opaquely equivalent to fs.ErrNotExist.
    39  type notExistError struct {
    40  	err error
    41  }
    42  
    43  func (e notExistError) Error() string   { return e.err.Error() }
    44  func (notExistError) Is(err error) bool { return err == fs.ErrNotExist }
    45  
    46  const gitWorkDirType = "git3"
    47  
    48  func newGitRepo(ctx context.Context, remote string, local bool) (Repo, error) {
    49  	r := &gitRepo{remote: remote, local: local}
    50  	if local {
    51  		if strings.Contains(remote, "://") { // Local flag, but URL provided
    52  			return nil, fmt.Errorf("git remote (%s) lookup disabled", remote)
    53  		}
    54  		info, err := os.Stat(remote)
    55  		if err != nil {
    56  			return nil, err
    57  		}
    58  		if !info.IsDir() {
    59  			return nil, fmt.Errorf("%s exists but is not a directory", remote)
    60  		}
    61  		r.dir = remote
    62  		r.mu.Path = r.dir + ".lock"
    63  		r.sha256Hashes = r.checkConfigSHA256(ctx)
    64  		return r, nil
    65  	}
    66  	// This is a remote path lookup.
    67  	if !strings.Contains(remote, "://") { // No URL scheme, could be host:path
    68  		if strings.Contains(remote, ":") {
    69  			return nil, fmt.Errorf("git remote (%s) must not be local directory (use URL syntax not host:path syntax)", remote)
    70  		}
    71  		return nil, fmt.Errorf("git remote (%s) must not be local directory", remote)
    72  	}
    73  	var err error
    74  	r.dir, r.mu.Path, err = WorkDir(ctx, gitWorkDirType, r.remote)
    75  	if err != nil {
    76  		return nil, err
    77  	}
    78  
    79  	unlock, err := r.mu.Lock()
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	defer unlock()
    84  
    85  	if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
    86  		repoSha256Hash := false
    87  		if refs, lrErr := r.loadRefs(ctx); lrErr == nil {
    88  			// Check any ref's hash, it doesn't matter which; they won't be mixed
    89  			// between sha1 and sha256 for the moment.
    90  			for _, refHash := range refs {
    91  				repoSha256Hash = len(refHash) == (256 / 4)
    92  				break
    93  			}
    94  		}
    95  		gitSupportsSHA256, gitVersErr := gitSupportsSHA256()
    96  		if gitVersErr != nil {
    97  			return nil, fmt.Errorf("unable to resolve git version: %w", gitVersErr)
    98  		}
    99  		objFormatFlag := []string{}
   100  		// If git is sufficiently recent to support sha256,
   101  		// always initialize with an explicit object-format.
   102  		if repoSha256Hash {
   103  			// We always set --object-format=sha256 if the repo
   104  			// we're cloning uses sha256 hashes because if the git
   105  			// version is too old, it'll fail either way, so we
   106  			// might as well give it one last chance.
   107  			objFormatFlag = []string{"--object-format=sha256"}
   108  		} else if gitSupportsSHA256 {
   109  			objFormatFlag = []string{"--object-format=sha1"}
   110  		}
   111  		if _, err := Run(ctx, r.dir, "git", "init", "--bare", objFormatFlag); err != nil {
   112  			os.RemoveAll(r.dir)
   113  			return nil, err
   114  		}
   115  		// We could just say git fetch https://whatever later,
   116  		// but this lets us say git fetch origin instead, which
   117  		// is a little nicer. More importantly, using a named remote
   118  		// avoids a problem with Git LFS. See golang.org/issue/25605.
   119  		if _, err := r.runGit(ctx, "git", "remote", "add", "origin", "--", r.remote); err != nil {
   120  			os.RemoveAll(r.dir)
   121  			return nil, err
   122  		}
   123  		if runtime.GOOS == "windows" {
   124  			// Git for Windows by default does not support paths longer than
   125  			// MAX_PATH (260 characters) because that may interfere with navigation
   126  			// in some Windows programs. However, cmd/go should be able to handle
   127  			// long paths just fine, and we expect people to use 'go clean' to
   128  			// manipulate the module cache, so it should be harmless to set here,
   129  			// and in some cases may be necessary in order to download modules with
   130  			// long branch names.
   131  			//
   132  			// See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path.
   133  			if _, err := r.runGit(ctx, "git", "config", "core.longpaths", "true"); err != nil {
   134  				os.RemoveAll(r.dir)
   135  				return nil, err
   136  			}
   137  		}
   138  	}
   139  	r.sha256Hashes = r.checkConfigSHA256(ctx)
   140  	r.remoteURL = r.remote
   141  	r.remote = "origin"
   142  	return r, nil
   143  }
   144  
   145  type gitRepo struct {
   146  	ctx context.Context
   147  
   148  	remote, remoteURL string
   149  	local             bool // local only lookups; no remote fetches
   150  	dir               string
   151  
   152  	// Repo uses the SHA256 for hashes, so expect the hashes to be 256/4 == 64-bytes in hex.
   153  	sha256Hashes bool
   154  
   155  	mu lockedfile.Mutex // protects fetchLevel and git repo state
   156  
   157  	fetchLevel int
   158  
   159  	statCache par.ErrCache[string, *RevInfo]
   160  
   161  	refsOnce sync.Once
   162  	// refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master")
   163  	// to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6")
   164  	refs    map[string]string
   165  	refsErr error
   166  
   167  	localTagsOnce sync.Once
   168  	localTags     sync.Map // map[string]bool
   169  }
   170  
   171  const (
   172  	// How much have we fetched into the git repo (in this process)?
   173  	fetchNone = iota // nothing yet
   174  	fetchSome        // shallow fetches of individual hashes
   175  	fetchAll         // "fetch -t origin": get all remote branches and tags
   176  )
   177  
   178  // loadLocalTags loads tag references from the local git cache
   179  // into the map r.localTags.
   180  func (r *gitRepo) loadLocalTags(ctx context.Context) {
   181  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   182  	// so we might as well record both heads and tags in one shot.
   183  	// Most of the time we only care about tags but sometimes we care about heads too.
   184  	out, err := r.runGit(ctx, "git", "tag", "-l")
   185  	if err != nil {
   186  		return
   187  	}
   188  
   189  	for line := range strings.SplitSeq(string(out), "\n") {
   190  		if line != "" {
   191  			r.localTags.Store(line, true)
   192  		}
   193  	}
   194  }
   195  
   196  func (r *gitRepo) CheckReuse(ctx context.Context, old *Origin, subdir string) error {
   197  	if old == nil {
   198  		return fmt.Errorf("missing origin")
   199  	}
   200  	if old.VCS != "git" || old.URL != r.remoteURL {
   201  		return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL)
   202  	}
   203  	if old.Subdir != subdir {
   204  		return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir)
   205  	}
   206  
   207  	// Note: Can have Hash with no Ref and no TagSum and no RepoSum,
   208  	// meaning the Hash simply has to remain in the repo.
   209  	// In that case we assume it does in the absence of any real way to check.
   210  	// But if neither Hash nor TagSum is present, we have nothing to check,
   211  	// which we take to mean we didn't record enough information to be sure.
   212  	if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" {
   213  		return fmt.Errorf("non-specific origin")
   214  	}
   215  
   216  	r.loadRefs(ctx)
   217  	if r.refsErr != nil {
   218  		return r.refsErr
   219  	}
   220  
   221  	if old.Ref != "" {
   222  		hash, ok := r.refs[old.Ref]
   223  		if !ok {
   224  			return fmt.Errorf("ref %q deleted", old.Ref)
   225  		}
   226  		if hash != old.Hash {
   227  			return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash)
   228  		}
   229  	}
   230  	if old.TagSum != "" {
   231  		tags, err := r.Tags(ctx, old.TagPrefix)
   232  		if err != nil {
   233  			return err
   234  		}
   235  		if tags.Origin.TagSum != old.TagSum {
   236  			return fmt.Errorf("tags changed")
   237  		}
   238  	}
   239  	if old.RepoSum != "" {
   240  		if r.repoSum(r.refs) != old.RepoSum {
   241  			return fmt.Errorf("refs changed")
   242  		}
   243  	}
   244  	return nil
   245  }
   246  
   247  // loadRefs loads heads and tags references from the remote into the map r.refs.
   248  // The result is cached in memory.
   249  func (r *gitRepo) loadRefs(ctx context.Context) (map[string]string, error) {
   250  	if r.local { // Return results from the cache if local only.
   251  		// In the future, we could consider loading r.refs using local git commands
   252  		// if desired.
   253  		return nil, nil
   254  	}
   255  	r.refsOnce.Do(func() {
   256  		// The git protocol sends all known refs and ls-remote filters them on the client side,
   257  		// so we might as well record both heads and tags in one shot.
   258  		// Most of the time we only care about tags but sometimes we care about heads too.
   259  		release, err := base.AcquireNet()
   260  		if err != nil {
   261  			r.refsErr = err
   262  			return
   263  		}
   264  		out, gitErr := r.runGit(ctx, "git", "ls-remote", "-q", r.remote)
   265  		release()
   266  
   267  		if gitErr != nil {
   268  			if rerr, ok := gitErr.(*RunError); ok {
   269  				if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
   270  					rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
   271  				}
   272  			}
   273  
   274  			// If the remote URL doesn't exist at all, ideally we should treat the whole
   275  			// repository as nonexistent by wrapping the error in a notExistError.
   276  			// For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL
   277  			// ourselves and see what code it serves.
   278  			if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
   279  				if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) {
   280  					gitErr = notExistError{gitErr}
   281  				}
   282  			}
   283  
   284  			r.refsErr = gitErr
   285  			return
   286  		}
   287  
   288  		refs := make(map[string]string)
   289  		for line := range strings.SplitSeq(string(out), "\n") {
   290  			f := strings.Fields(line)
   291  			if len(f) != 2 {
   292  				continue
   293  			}
   294  			if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
   295  				refs[f[1]] = f[0]
   296  			}
   297  		}
   298  		for ref, hash := range refs {
   299  			if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag
   300  				refs[k] = hash
   301  				delete(refs, ref)
   302  			}
   303  		}
   304  		r.refs = refs
   305  	})
   306  	return r.refs, r.refsErr
   307  }
   308  
   309  func (r *gitRepo) Tags(ctx context.Context, prefix string) (*Tags, error) {
   310  	refs, err := r.loadRefs(ctx)
   311  	if err != nil {
   312  		return nil, err
   313  	}
   314  
   315  	tags := &Tags{
   316  		Origin: &Origin{
   317  			VCS:       "git",
   318  			URL:       r.remoteURL,
   319  			TagPrefix: prefix,
   320  		},
   321  		List: []Tag{},
   322  	}
   323  	for ref, hash := range refs {
   324  		if !strings.HasPrefix(ref, "refs/tags/") {
   325  			continue
   326  		}
   327  		tag := ref[len("refs/tags/"):]
   328  		if !strings.HasPrefix(tag, prefix) {
   329  			continue
   330  		}
   331  		tags.List = append(tags.List, Tag{tag, hash})
   332  	}
   333  	sort.Slice(tags.List, func(i, j int) bool {
   334  		return tags.List[i].Name < tags.List[j].Name
   335  	})
   336  
   337  	dir := prefix[:strings.LastIndex(prefix, "/")+1]
   338  	h := sha256.New()
   339  	for _, tag := range tags.List {
   340  		if isOriginTag(strings.TrimPrefix(tag.Name, dir)) {
   341  			fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash)
   342  		}
   343  	}
   344  	tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
   345  	return tags, nil
   346  }
   347  
   348  // repoSum returns a checksum of the entire repo state,
   349  // which can be checked (as Origin.RepoSum) to cache
   350  // the absence of a specific module version.
   351  // The caller must supply refs, the result of a successful r.loadRefs.
   352  func (r *gitRepo) repoSum(refs map[string]string) string {
   353  	list := make([]string, 0, len(refs))
   354  	for ref := range refs {
   355  		list = append(list, ref)
   356  	}
   357  	sort.Strings(list)
   358  	h := sha256.New()
   359  	for _, ref := range list {
   360  		fmt.Fprintf(h, "%q %s\n", ref, refs[ref])
   361  	}
   362  	return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil))
   363  }
   364  
   365  // unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs,
   366  // for use when returning an UnknownRevisionError.
   367  func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo {
   368  	return &RevInfo{
   369  		Origin: &Origin{
   370  			VCS:     "git",
   371  			URL:     r.remoteURL,
   372  			RepoSum: r.repoSum(refs),
   373  		},
   374  	}
   375  }
   376  
   377  func (r *gitRepo) Latest(ctx context.Context) (*RevInfo, error) {
   378  	refs, err := r.loadRefs(ctx)
   379  	if err != nil {
   380  		return nil, err
   381  	}
   382  	if refs["HEAD"] == "" {
   383  		return nil, ErrNoCommits
   384  	}
   385  	statInfo, err := r.Stat(ctx, refs["HEAD"])
   386  	if err != nil {
   387  		return nil, err
   388  	}
   389  
   390  	// Stat may return cached info, so make a copy to modify here.
   391  	info := new(RevInfo)
   392  	*info = *statInfo
   393  	info.Origin = new(Origin)
   394  	if statInfo.Origin != nil {
   395  		*info.Origin = *statInfo.Origin
   396  	}
   397  	info.Origin.Ref = "HEAD"
   398  	info.Origin.Hash = refs["HEAD"]
   399  
   400  	return info, nil
   401  }
   402  
   403  func (r *gitRepo) checkConfigSHA256(ctx context.Context) bool {
   404  	if hashType, sha256CfgErr := r.runGit(ctx, "git", "config", "extensions.objectformat"); sha256CfgErr == nil {
   405  		return strings.TrimSpace(string(hashType)) == "sha256"
   406  	}
   407  	return false
   408  }
   409  
   410  func (r *gitRepo) hexHashLen() int {
   411  	if !r.sha256Hashes {
   412  		return 160 / 4
   413  	}
   414  	return 256 / 4
   415  }
   416  
   417  // shortenObjectHash shortens a SHA1 or SHA256 hash (40 or 64 hex digits) to
   418  // the canonical length used in pseudo-versions (12 hex digits).
   419  func (r *gitRepo) shortenObjectHash(rev string) string {
   420  	if !r.sha256Hashes {
   421  		return ShortenSHA1(rev)
   422  	}
   423  	if AllHex(rev) && len(rev) == 256/4 {
   424  		return rev[:12]
   425  	}
   426  	return rev
   427  }
   428  
   429  // minHashDigits is the minimum number of digits to require
   430  // before accepting a hex digit sequence as potentially identifying
   431  // a specific commit in a git repo. (Of course, users can always
   432  // specify more digits, and many will paste in all 40 digits,
   433  // but many of git's commands default to printing short hashes
   434  // as 7 digits.)
   435  const minHashDigits = 7
   436  
   437  // stat stats the given rev in the local repository,
   438  // or else it fetches more info from the remote repository and tries again.
   439  func (r *gitRepo) stat(ctx context.Context, rev string) (info *RevInfo, err error) {
   440  	// Fast path: maybe rev is a hash we already have locally.
   441  	didStatLocal := false
   442  	if len(rev) >= minHashDigits && len(rev) <= r.hexHashLen() && AllHex(rev) {
   443  		if info, err := r.statLocal(ctx, rev, rev); err == nil {
   444  			return info, nil
   445  		}
   446  		didStatLocal = true
   447  	}
   448  
   449  	// Maybe rev is a tag we already have locally.
   450  	// (Note that we're excluding branches, which can be stale.)
   451  	r.localTagsOnce.Do(func() { r.loadLocalTags(ctx) })
   452  	if _, ok := r.localTags.Load(rev); ok {
   453  		return r.statLocal(ctx, rev, "refs/tags/"+rev)
   454  	}
   455  
   456  	// Maybe rev is the name of a tag or branch on the remote server.
   457  	// Or maybe it's the prefix of a hash of a named ref.
   458  	// Try to resolve to both a ref (git name) and full (40-hex-digit for
   459  	// sha1 64 for sha256) commit hash.
   460  	refs, err := r.loadRefs(ctx)
   461  	if err != nil {
   462  		return nil, err
   463  	}
   464  	// loadRefs may return an error if git fails, for example segfaults, or
   465  	// could not load a private repo, but defer checking to the else block
   466  	// below, in case we already have the rev in question in the local cache.
   467  	var ref, hash string
   468  	if refs["refs/tags/"+rev] != "" {
   469  		ref = "refs/tags/" + rev
   470  		hash = refs[ref]
   471  		// Keep rev as is: tags are assumed not to change meaning.
   472  	} else if refs["refs/heads/"+rev] != "" {
   473  		ref = "refs/heads/" + rev
   474  		hash = refs[ref]
   475  		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
   476  	} else if rev == "HEAD" && refs["HEAD"] != "" {
   477  		ref = "HEAD"
   478  		hash = refs[ref]
   479  		rev = hash // Replace rev, because meaning of HEAD can change.
   480  	} else if len(rev) >= minHashDigits && len(rev) <= r.hexHashLen() && AllHex(rev) {
   481  		// At the least, we have a hash prefix we can look up after the fetch below.
   482  		// Maybe we can map it to a full hash using the known refs.
   483  		prefix := rev
   484  		// Check whether rev is prefix of known ref hash.
   485  		for k, h := range refs {
   486  			if strings.HasPrefix(h, prefix) {
   487  				if hash != "" && hash != h {
   488  					// Hash is an ambiguous hash prefix.
   489  					// More information will not change that.
   490  					return nil, fmt.Errorf("ambiguous revision %s", rev)
   491  				}
   492  				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
   493  					ref = k
   494  				}
   495  				rev = h
   496  				hash = h
   497  			}
   498  		}
   499  		if hash == "" && len(rev) == r.hexHashLen() { // Didn't find a ref, but rev is a full hash.
   500  			hash = rev
   501  		}
   502  	} else {
   503  		return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev}
   504  	}
   505  
   506  	defer func() {
   507  		if info != nil {
   508  			info.Origin.Hash = info.Name
   509  			// There's a ref = hash below; don't write that hash down as Origin.Ref.
   510  			if ref != info.Origin.Hash {
   511  				info.Origin.Ref = ref
   512  			}
   513  		}
   514  	}()
   515  
   516  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   517  	unlock, err := r.mu.Lock()
   518  	if err != nil {
   519  		return nil, err
   520  	}
   521  	defer unlock()
   522  
   523  	// Perhaps r.localTags did not have the ref when we loaded local tags,
   524  	// but we've since done fetches that pulled down the hash we need
   525  	// (or already have the hash we need, just without its tag).
   526  	// Either way, try a local stat before falling back to network I/O.
   527  	if !didStatLocal {
   528  		if info, err := r.statLocal(ctx, rev, hash); err == nil {
   529  			tag, fromTag := strings.CutPrefix(ref, "refs/tags/")
   530  			if fromTag && !slices.Contains(info.Tags, tag) {
   531  				// The local repo includes the commit hash we want, but it is missing
   532  				// the corresponding tag. Add that tag and try again.
   533  				_, err := r.runGit(ctx, "git", "tag", tag, hash)
   534  				if err != nil {
   535  					return nil, err
   536  				}
   537  				r.localTags.Store(tag, true)
   538  				return r.statLocal(ctx, rev, ref)
   539  			}
   540  			return info, err
   541  		}
   542  	}
   543  
   544  	if r.local { // at this point, we have determined that we need to fetch rev, fail early if local only mode.
   545  		return nil, fmt.Errorf("revision does not exist locally: %s", rev)
   546  	}
   547  
   548  	// If we know a specific commit we need and its ref, fetch it.
   549  	// We do NOT fetch arbitrary hashes (when we don't know the ref)
   550  	// because we want to avoid ever importing a commit that isn't
   551  	// reachable from refs/tags/* or refs/heads/* or HEAD.
   552  	// Both Gerrit and GitHub expose every CL/PR as a named ref,
   553  	// and we don't want those commits masquerading as being real
   554  	// pseudo-versions in the main repo.
   555  	if r.fetchLevel <= fetchSome && ref != "" && hash != "" {
   556  		r.fetchLevel = fetchSome
   557  		var refspec string
   558  		if ref == "HEAD" {
   559  			// Fetch the hash but give it a local name (refs/dummy),
   560  			// because that triggers the fetch behavior of creating any
   561  			// other known remote tags for the hash. We never use
   562  			// refs/dummy (it's not refs/tags/dummy) and it will be
   563  			// overwritten in the next command, and that's fine.
   564  			ref = hash
   565  			refspec = hash + ":refs/dummy"
   566  		} else {
   567  			// If we do know the ref name, save the mapping locally
   568  			// so that (if it is a tag) it can show up in localTags
   569  			// on a future call. Also, some servers refuse to allow
   570  			// full hashes in ref specs, so prefer a ref name if known.
   571  			refspec = ref + ":" + ref
   572  		}
   573  
   574  		release, err := base.AcquireNet()
   575  		if err != nil {
   576  			return nil, err
   577  		}
   578  		// We explicitly set protocol.version=2 for this command to work around
   579  		// an apparent Git bug introduced in Git 2.21 (commit 61c771),
   580  		// which causes the handler for protocol version 1 to sometimes miss
   581  		// tags that point to the requested commit (see https://go.dev/issue/56881).
   582  		_, err = r.runGit(ctx, "git", "-c", "protocol.version=2", "fetch", "-f", "--depth=1", r.remote, refspec)
   583  		release()
   584  
   585  		if err == nil {
   586  			return r.statLocal(ctx, rev, ref)
   587  		}
   588  		// Don't try to be smart about parsing the error.
   589  		// It's too complex and varies too much by git version.
   590  		// No matter what went wrong, fall back to a complete fetch.
   591  	}
   592  
   593  	// Last resort.
   594  	// Fetch all heads and tags and hope the hash we want is in the history.
   595  	if err := r.fetchRefsLocked(ctx); err != nil {
   596  		return nil, err
   597  	}
   598  
   599  	return r.statLocal(ctx, rev, rev)
   600  }
   601  
   602  // fetchRefsLocked fetches all heads and tags from the origin, along with the
   603  // ancestors of those commits.
   604  //
   605  // We only fetch heads and tags, not arbitrary other commits: we don't want to
   606  // pull in off-branch commits (such as rejected GitHub pull requests) that the
   607  // server may be willing to provide. (See the comments within the stat method
   608  // for more detail.)
   609  //
   610  // fetchRefsLocked requires that r.mu remain locked for the duration of the call.
   611  func (r *gitRepo) fetchRefsLocked(ctx context.Context) error {
   612  	if r.local {
   613  		panic("go: fetchRefsLocked called in local only mode.")
   614  	}
   615  	if r.fetchLevel < fetchAll {
   616  		// NOTE: To work around a bug affecting Git clients up to at least 2.23.0
   617  		// (2019-08-16), we must first expand the set of local refs, and only then
   618  		// unshallow the repository as a separate fetch operation. (See
   619  		// golang.org/issue/34266 and
   620  		// https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.)
   621  
   622  		release, err := base.AcquireNet()
   623  		if err != nil {
   624  			return err
   625  		}
   626  		defer release()
   627  
   628  		if _, err := r.runGit(ctx, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
   629  			return err
   630  		}
   631  
   632  		if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil {
   633  			if _, err := r.runGit(ctx, "git", "fetch", "--unshallow", "-f", r.remote); err != nil {
   634  				return err
   635  			}
   636  		}
   637  
   638  		r.fetchLevel = fetchAll
   639  	}
   640  	return nil
   641  }
   642  
   643  // statLocal returns a new RevInfo describing rev in the local git repository.
   644  // It uses version as info.Version.
   645  func (r *gitRepo) statLocal(ctx context.Context, version, rev string) (*RevInfo, error) {
   646  	out, err := r.runGit(ctx, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--")
   647  	if err != nil {
   648  		// Return info with Origin.RepoSum if possible to allow caching of negative lookup.
   649  		var info *RevInfo
   650  		if refs, err := r.loadRefs(ctx); err == nil {
   651  			info = r.unknownRevisionInfo(refs)
   652  		}
   653  		return info, &UnknownRevisionError{Rev: rev}
   654  	}
   655  	f := strings.Fields(string(out))
   656  	if len(f) < 2 {
   657  		return nil, fmt.Errorf("unexpected response from git log: %q", out)
   658  	}
   659  	hash := f[0]
   660  	if strings.HasPrefix(hash, version) {
   661  		version = hash // extend to full hash
   662  	}
   663  	t, err := strconv.ParseInt(f[1], 10, 64)
   664  	if err != nil {
   665  		return nil, fmt.Errorf("invalid time from git log: %q", out)
   666  	}
   667  
   668  	info := &RevInfo{
   669  		Origin: &Origin{
   670  			VCS:  "git",
   671  			URL:  r.remoteURL,
   672  			Hash: hash,
   673  		},
   674  		Name:    hash,
   675  		Short:   r.shortenObjectHash(hash),
   676  		Time:    time.Unix(t, 0).UTC(),
   677  		Version: hash,
   678  	}
   679  	if !strings.HasPrefix(hash, rev) {
   680  		info.Origin.Ref = rev
   681  	}
   682  
   683  	// Add tags. Output looks like:
   684  	//	ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
   685  	for i := 2; i < len(f); i++ {
   686  		if f[i] == "tag:" {
   687  			i++
   688  			if i < len(f) {
   689  				info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
   690  			}
   691  		}
   692  	}
   693  
   694  	// Git 2.47.1 does not send the tags during shallow clone anymore
   695  	// (perhaps the exact version that changed behavior is an earlier one),
   696  	// so we have to also add tags from the refs list we fetched with ls-remote.
   697  	if refs, err := r.loadRefs(ctx); err == nil {
   698  		for ref, h := range refs {
   699  			if h == hash {
   700  				if tag, found := strings.CutPrefix(ref, "refs/tags/"); found {
   701  					info.Tags = append(info.Tags, tag)
   702  				}
   703  			}
   704  		}
   705  	}
   706  	slices.Sort(info.Tags)
   707  	info.Tags = slices.Compact(info.Tags)
   708  
   709  	// Used hash as info.Version above.
   710  	// Use caller's suggested version if it appears in the tag list
   711  	// (filters out branch names, HEAD).
   712  	for _, tag := range info.Tags {
   713  		if version == tag {
   714  			info.Version = version
   715  		}
   716  	}
   717  
   718  	return info, nil
   719  }
   720  
   721  func (r *gitRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
   722  	if rev == "latest" {
   723  		return r.Latest(ctx)
   724  	}
   725  	return r.statCache.Do(rev, func() (*RevInfo, error) {
   726  		return r.stat(ctx, rev)
   727  	})
   728  }
   729  
   730  func (r *gitRepo) ReadFile(ctx context.Context, rev, file string, maxSize int64) ([]byte, error) {
   731  	// TODO: Could use git cat-file --batch.
   732  	info, err := r.Stat(ctx, rev) // download rev into local git repo
   733  	if err != nil {
   734  		return nil, err
   735  	}
   736  	out, err := r.runGit(ctx, "git", "cat-file", "blob", info.Name+":"+file)
   737  	if err != nil {
   738  		return nil, fs.ErrNotExist
   739  	}
   740  	return out, nil
   741  }
   742  
   743  func (r *gitRepo) RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) {
   744  	info, err := r.Stat(ctx, rev)
   745  	if err != nil {
   746  		return "", err
   747  	}
   748  	rev = info.Name // expand hash prefixes
   749  
   750  	// describe sets tag and err using 'git for-each-ref' and reports whether the
   751  	// result is definitive.
   752  	describe := func() (definitive bool) {
   753  		var out []byte
   754  		out, err = r.runGit(ctx, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev)
   755  		if err != nil {
   756  			return true
   757  		}
   758  
   759  		// prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix
   760  		var highest string
   761  		for line := range strings.SplitSeq(string(out), "\n") {
   762  			line = strings.TrimSpace(line)
   763  			// git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here
   764  			// instead gives support for git v2.7.0.
   765  			if !strings.HasPrefix(line, "refs/tags/") {
   766  				continue
   767  			}
   768  			line = line[len("refs/tags/"):]
   769  
   770  			if !strings.HasPrefix(line, prefix) {
   771  				continue
   772  			}
   773  			if !allowed(line) {
   774  				continue
   775  			}
   776  
   777  			semtag := line[len(prefix):]
   778  			if semver.Compare(semtag, highest) > 0 {
   779  				highest = semtag
   780  			}
   781  		}
   782  
   783  		if highest != "" {
   784  			tag = prefix + highest
   785  		}
   786  
   787  		return tag != "" && !AllHex(tag)
   788  	}
   789  
   790  	if describe() {
   791  		return tag, err
   792  	}
   793  
   794  	// Git didn't find a version tag preceding the requested rev.
   795  	// See whether any plausible tag exists.
   796  	tags, err := r.Tags(ctx, prefix+"v")
   797  	if err != nil {
   798  		return "", err
   799  	}
   800  	if len(tags.List) == 0 {
   801  		return "", nil
   802  	}
   803  
   804  	if r.local { // at this point, we have determined that we need to fetch rev, fail early if local only mode.
   805  		return "", fmt.Errorf("revision does not exist locally: %s", rev)
   806  	}
   807  	// There are plausible tags, but we don't know if rev is a descendent of any of them.
   808  	// Fetch the history to find out.
   809  
   810  	// Note: do not use defer unlock, because describe calls allowed,
   811  	// which uses retracted, which calls ReadFile, which may end up
   812  	// back at a method that acquires r.mu.
   813  	unlock, err := r.mu.Lock()
   814  	if err != nil {
   815  		return "", err
   816  	}
   817  	if err := r.fetchRefsLocked(ctx); err != nil {
   818  		unlock()
   819  		return "", err
   820  	}
   821  	unlock()
   822  
   823  	// If we've reached this point, we have all of the commits that are reachable
   824  	// from all heads and tags.
   825  	//
   826  	// The only refs we should be missing are those that are no longer reachable
   827  	// (or never were reachable) from any branch or tag, including the master
   828  	// branch, and we don't want to resolve them anyway (they're probably
   829  	// unreachable for a reason).
   830  	//
   831  	// Try one last time in case some other goroutine fetched rev while we were
   832  	// waiting on the lock.
   833  	describe()
   834  	return tag, err
   835  }
   836  
   837  func (r *gitRepo) DescendsFrom(ctx context.Context, rev, tag string) (bool, error) {
   838  	// The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so
   839  	// this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go
   840  	// already doesn't work with Git 1.7.1, so at least it's not a regression.
   841  	//
   842  	// git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or
   843  	// 1 if not.
   844  	_, err := r.runGit(ctx, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   845  
   846  	// Git reports "is an ancestor" with exit code 0 and "not an ancestor" with
   847  	// exit code 1.
   848  	// Unfortunately, if we've already fetched rev with a shallow history, git
   849  	// merge-base has been observed to report a false-negative, so don't stop yet
   850  	// even if the exit code is 1!
   851  	if err == nil {
   852  		return true, nil
   853  	}
   854  
   855  	// See whether the tag and rev even exist.
   856  	tags, err := r.Tags(ctx, tag)
   857  	if err != nil {
   858  		return false, err
   859  	}
   860  	if len(tags.List) == 0 {
   861  		return false, nil
   862  	}
   863  
   864  	// NOTE: r.stat is very careful not to fetch commits that we shouldn't know
   865  	// about, like rejected GitHub pull requests, so don't try to short-circuit
   866  	// that here.
   867  	if _, err = r.stat(ctx, rev); err != nil {
   868  		return false, err
   869  	}
   870  
   871  	if r.local { // at this point, we have determined that we need to fetch rev, fail early if local only mode.
   872  		return false, fmt.Errorf("revision does not exist locally: %s", rev)
   873  	}
   874  
   875  	// Now fetch history so that git can search for a path.
   876  	unlock, err := r.mu.Lock()
   877  	if err != nil {
   878  		return false, err
   879  	}
   880  	defer unlock()
   881  
   882  	if r.fetchLevel < fetchAll {
   883  		// Fetch the complete history for all refs and heads. It would be more
   884  		// efficient to only fetch the history from rev to tag, but that's much more
   885  		// complicated, and any kind of shallow fetch is fairly likely to trigger
   886  		// bugs in JGit servers and/or the go command anyway.
   887  		if err := r.fetchRefsLocked(ctx); err != nil {
   888  			return false, err
   889  		}
   890  	}
   891  
   892  	_, err = r.runGit(ctx, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   893  	if err == nil {
   894  		return true, nil
   895  	}
   896  	if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
   897  		return false, nil
   898  	}
   899  	return false, err
   900  }
   901  
   902  func (r *gitRepo) ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
   903  	// TODO: Use maxSize or drop it.
   904  	args := []string{}
   905  	if subdir != "" {
   906  		args = append(args, "--", subdir)
   907  	}
   908  	info, err := r.Stat(ctx, rev) // download rev into local git repo
   909  	if err != nil {
   910  		return nil, err
   911  	}
   912  
   913  	unlock, err := r.mu.Lock()
   914  	if err != nil {
   915  		return nil, err
   916  	}
   917  	defer unlock()
   918  
   919  	if err := ensureGitAttributes(r.dir); err != nil {
   920  		return nil, err
   921  	}
   922  
   923  	// Incredibly, git produces different archives depending on whether
   924  	// it is running on a Windows system or not, in an attempt to normalize
   925  	// text file line endings. Setting -c core.autocrlf=input means only
   926  	// translate files on the way into the repo, not on the way out (archive).
   927  	// The -c core.eol=lf should be unnecessary but set it anyway.
   928  	archive, err := r.runGit(ctx, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
   929  	if err != nil {
   930  		if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
   931  			return nil, fs.ErrNotExist
   932  		}
   933  		return nil, err
   934  	}
   935  
   936  	return io.NopCloser(bytes.NewReader(archive)), nil
   937  }
   938  
   939  // ensureGitAttributes makes sure export-subst and export-ignore features are
   940  // disabled for this repo. This is intended to be run prior to running git
   941  // archive so that zip files are generated that produce consistent ziphashes
   942  // for a given revision, independent of variables such as git version and the
   943  // size of the repo.
   944  //
   945  // See: https://github.com/golang/go/issues/27153
   946  func ensureGitAttributes(repoDir string) (err error) {
   947  	const attr = "\n* -export-subst -export-ignore\n"
   948  
   949  	d := repoDir + "/info"
   950  	p := d + "/attributes"
   951  
   952  	if err := os.MkdirAll(d, 0755); err != nil {
   953  		return err
   954  	}
   955  
   956  	f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
   957  	if err != nil {
   958  		return err
   959  	}
   960  	defer func() {
   961  		closeErr := f.Close()
   962  		if closeErr != nil {
   963  			err = closeErr
   964  		}
   965  	}()
   966  
   967  	b, err := io.ReadAll(f)
   968  	if err != nil {
   969  		return err
   970  	}
   971  	if !bytes.HasSuffix(b, []byte(attr)) {
   972  		_, err := f.WriteString(attr)
   973  		return err
   974  	}
   975  
   976  	return nil
   977  }
   978  
   979  func (r *gitRepo) runGit(ctx context.Context, cmdline ...any) ([]byte, error) {
   980  	args := RunArgs{cmdline: cmdline, dir: r.dir, local: r.local}
   981  	if !r.local {
   982  		// Manually supply GIT_DIR so Git works with safe.bareRepository=explicit set.
   983  		// This is necessary only for remote repositories as they are initialized with git init --bare.
   984  		args.env = []string{"GIT_DIR=" + r.dir}
   985  	}
   986  	return RunWithArgs(ctx, args)
   987  }
   988  
   989  // Capture the major, minor and (optionally) patch version, but ignore anything later
   990  var gitVersLineExtract = regexp.MustCompile(`git version\s+(\d+\.\d+(?:\.\d+)?)`)
   991  
   992  func gitVersion() (string, error) {
   993  	gitOut, runErr := exec.Command("git", "version").CombinedOutput()
   994  	if runErr != nil {
   995  		return "v0", fmt.Errorf("failed to execute git version: %w", runErr)
   996  	}
   997  	return extractGitVersion(gitOut)
   998  }
   999  
  1000  func extractGitVersion(gitOut []byte) (string, error) {
  1001  	matches := gitVersLineExtract.FindSubmatch(gitOut)
  1002  	if len(matches) < 2 {
  1003  		return "v0", fmt.Errorf("git version extraction regexp did not match version line: %q", gitOut)
  1004  	}
  1005  	return "v" + string(matches[1]), nil
  1006  }
  1007  
  1008  func hasAtLeastGitVersion(minVers string) (bool, error) {
  1009  	gitVers, gitVersErr := gitVersion()
  1010  	if gitVersErr != nil {
  1011  		return false, gitVersErr
  1012  	}
  1013  	return semver.Compare(minVers, gitVers) <= 0, nil
  1014  }
  1015  
  1016  const minGitSHA256Vers = "v2.29"
  1017  
  1018  func gitSupportsSHA256() (bool, error) {
  1019  	return hasAtLeastGitVersion(minGitSHA256Vers)
  1020  }
  1021  

View as plain text